Sunil Sarolkar
commited on
Commit
·
9c63ccc
0
Parent(s):
Fresh reset: clean Space without LFS history
Browse files- ISL_Model_parameter.py +662 -0
- app.py +653 -0
- expression_mapping.py +168 -0
- model.py +407 -0
- packages.txt +6 -0
- requirements.txt +22 -0
- util.py +463 -0
ISL_Model_parameter.py
ADDED
|
@@ -0,0 +1,662 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import keras
|
| 2 |
+
from keras.layers import TorchModuleWrapper
|
| 3 |
+
import numpy as np
|
| 4 |
+
import cv2
|
| 5 |
+
import torch
|
| 6 |
+
from scipy.ndimage.filters import gaussian_filter
|
| 7 |
+
import math
|
| 8 |
+
import os
|
| 9 |
+
import numpy as np
|
| 10 |
+
from skimage.measure import label
|
| 11 |
+
import util as util
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
class ISLSignPos(keras.Model):
|
| 15 |
+
def __init__(self,pt_body_model,pt_hand_model):
|
| 16 |
+
super().__init__()
|
| 17 |
+
self.pt_body = TorchModuleWrapper(pt_body_model)
|
| 18 |
+
self.pt_body.trainable=False
|
| 19 |
+
self.pt_hand = TorchModuleWrapper(pt_hand_model)
|
| 20 |
+
self.pt_hand.trainable=False
|
| 21 |
+
self.njoint_body = 26
|
| 22 |
+
self.npaf_body = 52
|
| 23 |
+
|
| 24 |
+
def call(self, oriImg):
|
| 25 |
+
candidate, subset = self.bodypos(oriImg.cpu().numpy())
|
| 26 |
+
hands_list = util.handDetect(candidate, subset, oriImg.cpu().numpy())
|
| 27 |
+
all_hand_peaks = []
|
| 28 |
+
for x, y, w, is_left in hands_list:
|
| 29 |
+
peaks = self.handpos(oriImg.cpu().numpy()[y:y+w, x:x+w, :])
|
| 30 |
+
peaks[:, 0] = np.where(peaks[:, 0]==0, peaks[:, 0], peaks[:, 0]+x)
|
| 31 |
+
peaks[:, 1] = np.where(peaks[:, 1]==0, peaks[:, 1], peaks[:, 1]+y)
|
| 32 |
+
all_hand_peaks.append(peaks)
|
| 33 |
+
return (candidate, subset,all_hand_peaks)
|
| 34 |
+
|
| 35 |
+
def bodypos(self, oriImg):
|
| 36 |
+
model_type = 'body25'
|
| 37 |
+
scale_search = [0.5]
|
| 38 |
+
boxsize = 368
|
| 39 |
+
stride = 8
|
| 40 |
+
padValue = 128
|
| 41 |
+
thre1 = 0.1
|
| 42 |
+
thre2 = 0.05
|
| 43 |
+
multiplier = [x * boxsize / oriImg.shape[0] for x in scale_search]
|
| 44 |
+
heatmap_avg = np.zeros((oriImg.shape[0], oriImg.shape[1], self.njoint_body))
|
| 45 |
+
paf_avg = np.zeros((oriImg.shape[0], oriImg.shape[1], self.npaf_body))
|
| 46 |
+
|
| 47 |
+
for m in range(len(multiplier)):
|
| 48 |
+
scale = multiplier[m]
|
| 49 |
+
imageToTest = cv2.resize(oriImg, (0, 0), fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC)
|
| 50 |
+
imageToTest_padded, pad = util.padRightDownCorner(imageToTest, stride, padValue)
|
| 51 |
+
im = np.transpose(np.float32(imageToTest_padded[:, :, :, np.newaxis]), (3, 2, 0, 1)) / 256 - 0.5
|
| 52 |
+
im = np.ascontiguousarray(im)
|
| 53 |
+
|
| 54 |
+
data = torch.from_numpy(im).float()
|
| 55 |
+
if torch.cuda.is_available():
|
| 56 |
+
data = data.cuda()
|
| 57 |
+
with torch.no_grad():
|
| 58 |
+
Mconv7_stage6_L1, Mconv7_stage6_L2 = self.pt_body(data)
|
| 59 |
+
Mconv7_stage6_L1 = Mconv7_stage6_L1.cpu().numpy()
|
| 60 |
+
Mconv7_stage6_L2 = Mconv7_stage6_L2.cpu().numpy()
|
| 61 |
+
|
| 62 |
+
heatmap = np.transpose(np.squeeze(Mconv7_stage6_L2), (1, 2, 0)) # output 1 is heatmaps
|
| 63 |
+
heatmap = cv2.resize(heatmap, (0, 0), fx=stride, fy=stride, interpolation=cv2.INTER_CUBIC)
|
| 64 |
+
heatmap = heatmap[:imageToTest_padded.shape[0] - pad[2], :imageToTest_padded.shape[1] - pad[3], :]
|
| 65 |
+
heatmap = cv2.resize(heatmap, (oriImg.shape[1], oriImg.shape[0]), interpolation=cv2.INTER_CUBIC)
|
| 66 |
+
|
| 67 |
+
paf = np.transpose(np.squeeze(Mconv7_stage6_L1), (1, 2, 0)) # output 0 is PAFs
|
| 68 |
+
paf = cv2.resize(paf, (0, 0), fx=stride, fy=stride, interpolation=cv2.INTER_CUBIC)
|
| 69 |
+
paf = paf[:imageToTest_padded.shape[0] - pad[2], :imageToTest_padded.shape[1] - pad[3], :]
|
| 70 |
+
paf = cv2.resize(paf, (oriImg.shape[1], oriImg.shape[0]), interpolation=cv2.INTER_CUBIC)
|
| 71 |
+
|
| 72 |
+
heatmap_avg += heatmap_avg + heatmap / len(multiplier)
|
| 73 |
+
paf_avg += + paf / len(multiplier)
|
| 74 |
+
|
| 75 |
+
all_peaks = []
|
| 76 |
+
peak_counter = 0
|
| 77 |
+
|
| 78 |
+
for part in range(self.njoint_body-1):
|
| 79 |
+
map_ori = heatmap_avg[:, :, part]
|
| 80 |
+
one_heatmap = gaussian_filter(map_ori, sigma=3)
|
| 81 |
+
|
| 82 |
+
map_left = np.zeros(one_heatmap.shape)
|
| 83 |
+
map_left[1:, :] = one_heatmap[:-1, :]
|
| 84 |
+
map_right = np.zeros(one_heatmap.shape)
|
| 85 |
+
map_right[:-1, :] = one_heatmap[1:, :]
|
| 86 |
+
map_up = np.zeros(one_heatmap.shape)
|
| 87 |
+
map_up[:, 1:] = one_heatmap[:, :-1]
|
| 88 |
+
map_down = np.zeros(one_heatmap.shape)
|
| 89 |
+
map_down[:, :-1] = one_heatmap[:, 1:]
|
| 90 |
+
|
| 91 |
+
peaks_binary = np.logical_and.reduce(
|
| 92 |
+
(one_heatmap >= map_left, one_heatmap >= map_right, one_heatmap >= map_up, one_heatmap >= map_down, one_heatmap > thre1))
|
| 93 |
+
peaks = list(zip(np.nonzero(peaks_binary)[1], np.nonzero(peaks_binary)[0])) # note reverse
|
| 94 |
+
peaks_with_score = [x + (map_ori[x[1], x[0]],) for x in peaks]
|
| 95 |
+
peak_id = range(peak_counter, peak_counter + len(peaks))
|
| 96 |
+
peaks_with_score_and_id = [peaks_with_score[i] + (peak_id[i],) for i in range(len(peak_id))]
|
| 97 |
+
|
| 98 |
+
all_peaks.append(peaks_with_score_and_id)
|
| 99 |
+
peak_counter += len(peaks)
|
| 100 |
+
|
| 101 |
+
if model_type=='body25':
|
| 102 |
+
# find connection in the specified sequence, center 29 is in the position 15
|
| 103 |
+
limbSeq = [[1,0],[1,2],[2,3],[3,4],[1,5],[5,6],[6,7],[1,8],[8,9],[9,10],\
|
| 104 |
+
[10,11],[8,12],[12,13],[13,14],[0,15],[0,16],[15,17],[16,18],\
|
| 105 |
+
[11,24],[11,22],[14,21],[14,19],[22,23],[19,20]]
|
| 106 |
+
# the middle joints heatmap correpondence
|
| 107 |
+
mapIdx = [[30, 31],[14, 15],[16, 17],[18, 19],[22, 23],[24, 25],[26, 27],[0, 1],[6, 7],\
|
| 108 |
+
[2, 3],[4, 5], [8, 9],[10, 11],[12, 13],[32, 33],[34, 35],[36,37],[38,39],\
|
| 109 |
+
[50,51],[46,47],[44,45],[40,41],[48,49],[42,43]]
|
| 110 |
+
else:
|
| 111 |
+
# find connection in the specified sequence, center 29 is in the position 15
|
| 112 |
+
limbSeq = [[1, 2], [1, 5], [2, 3], [3, 4], [5, 6], [6, 7], [1, 8], [8, 9], \
|
| 113 |
+
[9, 10], [1, 11], [11, 12], [12, 13], [1, 0], [0, 14], [14, 16], \
|
| 114 |
+
[0, 15], [15, 17], [2, 16], [5, 17]]
|
| 115 |
+
# the middle joints heatmap correpondence
|
| 116 |
+
mapIdx = [[12, 13], [20, 21], [14, 15], [16, 17], [22, 23], [24, 25], [0, 1], [2, 3], \
|
| 117 |
+
[4, 5], [6, 7], [8, 9], [10, 11], [28, 29], [30, 31], [34, 35], [32, 33], \
|
| 118 |
+
[36, 37], [18, 19], [26, 27]]
|
| 119 |
+
|
| 120 |
+
connection_all = []
|
| 121 |
+
special_k = []
|
| 122 |
+
mid_num = 10
|
| 123 |
+
|
| 124 |
+
for k in range(len(mapIdx)):
|
| 125 |
+
score_mid = paf_avg[:, :, mapIdx[k]]
|
| 126 |
+
candA = all_peaks[limbSeq[k][0]]
|
| 127 |
+
candB = all_peaks[limbSeq[k][1]]
|
| 128 |
+
|
| 129 |
+
nA = len(candA)
|
| 130 |
+
nB = len(candB)
|
| 131 |
+
indexA, indexB = limbSeq[k]
|
| 132 |
+
if (nA != 0 and nB != 0):
|
| 133 |
+
connection_candidate = []
|
| 134 |
+
for i in range(nA):
|
| 135 |
+
for j in range(nB):
|
| 136 |
+
vec = np.subtract(candB[j][:2], candA[i][:2])
|
| 137 |
+
norm = math.sqrt(vec[0] * vec[0] + vec[1] * vec[1])
|
| 138 |
+
norm = max(0.001, norm)
|
| 139 |
+
vec = np.divide(vec, norm)
|
| 140 |
+
|
| 141 |
+
startend = list(zip(np.linspace(candA[i][0], candB[j][0], num=mid_num), \
|
| 142 |
+
np.linspace(candA[i][1], candB[j][1], num=mid_num)))
|
| 143 |
+
|
| 144 |
+
vec_x = np.array([score_mid[int(round(startend[I][1])), int(round(startend[I][0])), 0] \
|
| 145 |
+
for I in range(len(startend))])
|
| 146 |
+
vec_y = np.array([score_mid[int(round(startend[I][1])), int(round(startend[I][0])), 1] \
|
| 147 |
+
for I in range(len(startend))])
|
| 148 |
+
|
| 149 |
+
score_midpts = np.multiply(vec_x, vec[0]) + np.multiply(vec_y, vec[1])
|
| 150 |
+
score_with_dist_prior = sum(score_midpts) / len(score_midpts) + min(
|
| 151 |
+
0.5 * oriImg.shape[0] / norm - 1, 0)
|
| 152 |
+
criterion1 = len(np.nonzero(score_midpts > thre2)[0]) > 0.8 * len(score_midpts)
|
| 153 |
+
criterion2 = score_with_dist_prior > 0
|
| 154 |
+
if criterion1 and criterion2:
|
| 155 |
+
connection_candidate.append(
|
| 156 |
+
[i, j, score_with_dist_prior, score_with_dist_prior + candA[i][2] + candB[j][2]])
|
| 157 |
+
|
| 158 |
+
connection_candidate = sorted(connection_candidate, key=lambda x: x[2], reverse=True)
|
| 159 |
+
connection = np.zeros((0, 5))
|
| 160 |
+
for c in range(len(connection_candidate)):
|
| 161 |
+
i, j, s = connection_candidate[c][0:3]
|
| 162 |
+
if (i not in connection[:, 3] and j not in connection[:, 4]):
|
| 163 |
+
connection = np.vstack([connection, [candA[i][3], candB[j][3], s, i, j]])
|
| 164 |
+
if (len(connection) >= min(nA, nB)):
|
| 165 |
+
break
|
| 166 |
+
|
| 167 |
+
connection_all.append(connection)
|
| 168 |
+
else:
|
| 169 |
+
special_k.append(k)
|
| 170 |
+
connection_all.append([])
|
| 171 |
+
|
| 172 |
+
# last number in each row is the total parts number of that person
|
| 173 |
+
# the second last number in each row is the score of the overall configuration
|
| 174 |
+
subset = -1 * np.ones((0, self.njoint_body+1))
|
| 175 |
+
candidate = np.array([item for sublist in all_peaks for item in sublist])
|
| 176 |
+
|
| 177 |
+
for k in range(len(mapIdx)):
|
| 178 |
+
if k not in special_k:
|
| 179 |
+
partAs = connection_all[k][:, 0]
|
| 180 |
+
partBs = connection_all[k][:, 1]
|
| 181 |
+
indexA, indexB = np.array(limbSeq[k])
|
| 182 |
+
|
| 183 |
+
for i in range(len(connection_all[k])): # = 1:size(temp,1)
|
| 184 |
+
found = 0
|
| 185 |
+
subset_idx = [-1, -1]
|
| 186 |
+
for j in range(len(subset)): # 1:size(subset,1):
|
| 187 |
+
if subset[j][indexA] == partAs[i] or subset[j][indexB] == partBs[i]:
|
| 188 |
+
subset_idx[found] = j
|
| 189 |
+
found += 1
|
| 190 |
+
|
| 191 |
+
if found == 1:
|
| 192 |
+
j = subset_idx[0]
|
| 193 |
+
if subset[j][indexB] != partBs[i]:
|
| 194 |
+
subset[j][indexB] = partBs[i]
|
| 195 |
+
subset[j][-1] += 1
|
| 196 |
+
subset[j][-2] += candidate[partBs[i].astype(int), 2] + connection_all[k][i][2]
|
| 197 |
+
elif found == 2: # if found 2 and disjoint, merge them
|
| 198 |
+
j1, j2 = subset_idx
|
| 199 |
+
membership = ((subset[j1] >= 0).astype(int) + (subset[j2] >= 0).astype(int))[:-2]
|
| 200 |
+
if len(np.nonzero(membership == 2)[0]) == 0: # merge
|
| 201 |
+
subset[j1][:-2] += (subset[j2][:-2] + 1)
|
| 202 |
+
subset[j1][-2:] += subset[j2][-2:]
|
| 203 |
+
subset[j1][-2] += connection_all[k][i][2]
|
| 204 |
+
subset = np.delete(subset, j2, 0)
|
| 205 |
+
else: # as like found == 1
|
| 206 |
+
subset[j1][indexB] = partBs[i]
|
| 207 |
+
subset[j1][-1] += 1
|
| 208 |
+
subset[j1][-2] += candidate[partBs[i].astype(int), 2] + connection_all[k][i][2]
|
| 209 |
+
|
| 210 |
+
# if find no partA in the subset, create a new subset
|
| 211 |
+
elif not found and k < self.njoint_body-2:
|
| 212 |
+
row = -1 * np.ones(self.njoint_body+1)
|
| 213 |
+
row[indexA] = partAs[i]
|
| 214 |
+
row[indexB] = partBs[i]
|
| 215 |
+
row[-1] = 2
|
| 216 |
+
row[-2] = sum(candidate[connection_all[k][i, :2].astype(int), 2]) + connection_all[k][i][2]
|
| 217 |
+
subset = np.vstack([subset, row])
|
| 218 |
+
# delete some rows of subset which has few parts occur
|
| 219 |
+
deleteIdx = []
|
| 220 |
+
for i in range(len(subset)):
|
| 221 |
+
if subset[i][-1] < 4 or subset[i][-2] / subset[i][-1] < 0.4:
|
| 222 |
+
deleteIdx.append(i)
|
| 223 |
+
subset = np.delete(subset, deleteIdx, axis=0)
|
| 224 |
+
|
| 225 |
+
# subset: n*20 array, 0-17 is the index in candidate, 18 is the total score, 19 is the total parts
|
| 226 |
+
# candidate: x, y, score, id
|
| 227 |
+
return candidate, subset
|
| 228 |
+
|
| 229 |
+
def handpos(self, oriImg):
|
| 230 |
+
scale_search = [0.5, 1.0, 1.5, 2.0]
|
| 231 |
+
# scale_search = [0.5]
|
| 232 |
+
boxsize = 368
|
| 233 |
+
stride = 8
|
| 234 |
+
padValue = 128
|
| 235 |
+
thre = 0.05
|
| 236 |
+
multiplier = [x * boxsize / oriImg.shape[0] for x in scale_search]
|
| 237 |
+
heatmap_avg = np.zeros((oriImg.shape[0], oriImg.shape[1], 22))
|
| 238 |
+
# paf_avg = np.zeros((oriImg.shape[0], oriImg.shape[1], 38))
|
| 239 |
+
|
| 240 |
+
for m in range(len(multiplier)):
|
| 241 |
+
scale = multiplier[m]
|
| 242 |
+
imageToTest = cv2.resize(oriImg, (0, 0), fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC)
|
| 243 |
+
imageToTest_padded, pad = util.padRightDownCorner(imageToTest, stride, padValue)
|
| 244 |
+
im = np.transpose(np.float32(imageToTest_padded[:, :, :, np.newaxis]), (3, 2, 0, 1)) / 256 - 0.5
|
| 245 |
+
im = np.ascontiguousarray(im)
|
| 246 |
+
|
| 247 |
+
data = torch.from_numpy(im).float()
|
| 248 |
+
if torch.cuda.is_available():
|
| 249 |
+
data = data.cuda()
|
| 250 |
+
# data = data.permute([2, 0, 1]).unsqueeze(0).float()
|
| 251 |
+
with torch.no_grad():
|
| 252 |
+
output = self.pt_hand(data).cpu().numpy()
|
| 253 |
+
# output = self.model(data).numpy()q
|
| 254 |
+
|
| 255 |
+
# extract outputs, resize, and remove padding
|
| 256 |
+
heatmap = np.transpose(np.squeeze(output), (1, 2, 0)) # output 1 is heatmaps
|
| 257 |
+
heatmap = cv2.resize(heatmap, (0, 0), fx=stride, fy=stride, interpolation=cv2.INTER_CUBIC)
|
| 258 |
+
heatmap = heatmap[:imageToTest_padded.shape[0] - pad[2], :imageToTest_padded.shape[1] - pad[3], :]
|
| 259 |
+
heatmap = cv2.resize(heatmap, (oriImg.shape[1], oriImg.shape[0]), interpolation=cv2.INTER_CUBIC)
|
| 260 |
+
|
| 261 |
+
heatmap_avg += heatmap / len(multiplier)
|
| 262 |
+
|
| 263 |
+
all_peaks = []
|
| 264 |
+
for part in range(21):
|
| 265 |
+
map_ori = heatmap_avg[:, :, part]
|
| 266 |
+
one_heatmap = gaussian_filter(map_ori, sigma=3)
|
| 267 |
+
binary = np.ascontiguousarray(one_heatmap > thre, dtype=np.uint8)
|
| 268 |
+
# 全部小于阈值
|
| 269 |
+
if np.sum(binary) == 0:
|
| 270 |
+
all_peaks.append([0, 0])
|
| 271 |
+
continue
|
| 272 |
+
label_img, label_numbers = label(binary, return_num=True, connectivity=binary.ndim)
|
| 273 |
+
max_index = np.argmax([np.sum(map_ori[label_img == i]) for i in range(1, label_numbers + 1)]) + 1
|
| 274 |
+
label_img[label_img != max_index] = 0
|
| 275 |
+
map_ori[label_img == 0] = 0
|
| 276 |
+
|
| 277 |
+
y, x = util.npmax(map_ori)
|
| 278 |
+
all_peaks.append([x, y])
|
| 279 |
+
return np.array(all_peaks)
|
| 280 |
+
|
| 281 |
+
class ISLSignPosTranslator(keras.Model):
|
| 282 |
+
|
| 283 |
+
def __init__(self,body_model,hand_model, translation_model):
|
| 284 |
+
super().__init__()
|
| 285 |
+
self.pt_body = TorchModuleWrapper(body_model)
|
| 286 |
+
self.pt_body.trainable=False
|
| 287 |
+
self.pt_hand = TorchModuleWrapper(hand_model)
|
| 288 |
+
self.pt_hand.trainable=False
|
| 289 |
+
|
| 290 |
+
self.njoint_body = 26
|
| 291 |
+
self.npaf_body = 52
|
| 292 |
+
self.model_type='body25'
|
| 293 |
+
self.translation_layer=translation_model
|
| 294 |
+
|
| 295 |
+
def call(self, window):
|
| 296 |
+
window_size=20
|
| 297 |
+
window_features=[]
|
| 298 |
+
blank_frame=np.zeros((1,156))
|
| 299 |
+
for idx,frame in enumerate(window.cpu()):
|
| 300 |
+
|
| 301 |
+
# frame=frame.cpu().numpy()[:, :, ::-1]
|
| 302 |
+
candidate, subset = self.bodypos(frame.cpu().numpy())
|
| 303 |
+
hands_list = util.handDetect(candidate, subset, frame.cpu().numpy())
|
| 304 |
+
all_hand_peaks = []
|
| 305 |
+
for x, y, w, is_left in hands_list:
|
| 306 |
+
peaks = self.handpos(frame.cpu().numpy()[y:y+w, x:x+w, :])
|
| 307 |
+
peaks[:, 0] = np.where(peaks[:, 0]==0, peaks[:, 0], peaks[:, 0]+x)
|
| 308 |
+
peaks[:, 1] = np.where(peaks[:, 1]==0, peaks[:, 1], peaks[:, 1]+y)
|
| 309 |
+
all_hand_peaks.append(peaks)
|
| 310 |
+
|
| 311 |
+
(bodypose_circles,bodypose_sticks,)=util.get_bodypose(candidate, subset, self.model_type)
|
| 312 |
+
(handpose_edges,handpose_peaks)=util.get_handpose(all_hand_peaks,)
|
| 313 |
+
|
| 314 |
+
feature=self.populate_features(bodypose_circles,handpose_peaks)
|
| 315 |
+
window_features.append(feature)
|
| 316 |
+
|
| 317 |
+
if len(window_features)<window_size:
|
| 318 |
+
for _ in range(0,(window_size-window_features.shape[0])):
|
| 319 |
+
window_features.append(blank_frame)
|
| 320 |
+
|
| 321 |
+
|
| 322 |
+
# self.writer.close()
|
| 323 |
+
# timeseries=self.create_timeseries_data(feature)
|
| 324 |
+
# self.frame_to_window(feature)
|
| 325 |
+
# np.savetxt('C:/Users/spsar/OneDrive/Desktop/test/MVI_9590.MOV.window1.numpy',np.array(window_features))
|
| 326 |
+
return self.translation_layer(np.array(window_features).reshape(1,20,156))
|
| 327 |
+
|
| 328 |
+
|
| 329 |
+
def frame_to_window(self,frame):
|
| 330 |
+
"""
|
| 331 |
+
Converts a single frame to a rolling window array with zero padding.
|
| 332 |
+
|
| 333 |
+
Args:
|
| 334 |
+
frame: A numpy array representing a video frame.
|
| 335 |
+
window_size: The size of the rolling window (default: 20).
|
| 336 |
+
window (optional): An existing window array to add the frame to
|
| 337 |
+
(useful for maintaining rolling window state).
|
| 338 |
+
|
| 339 |
+
Returns:
|
| 340 |
+
A numpy array representing the rolling window with the added frame.
|
| 341 |
+
"""
|
| 342 |
+
|
| 343 |
+
# Shift the window elements by 1 (oldest frame is dropped)
|
| 344 |
+
self.window[:-1] = self.window[1:]
|
| 345 |
+
|
| 346 |
+
# Add the new frame to the end of the window
|
| 347 |
+
self.window[-1] = frame
|
| 348 |
+
|
| 349 |
+
def populate_features(self,bodypose_circles,handpose_peaks):
|
| 350 |
+
# X_body_test = [f'bodypeaks_x_{i}' for i in range(15)] + [f'bodypeaks_y_{i}' for i in range(15)]
|
| 351 |
+
# X_hand0_test = [f'hand0peaks_x_{i}' for i in range(21)] + [f'hand0peaks_y_{i}' for i in range(21)] + [f'hand0peaks_peaktxt{i}' for i in range(21)]
|
| 352 |
+
# X_hand1_test = [f'hand1peaks_x_{i}' for i in range(21)] + [f'hand1peaks_y_{i}' for i in range(21)] + [f'hand1peaks_peaktxt{i}' for i in range(21)]
|
| 353 |
+
|
| 354 |
+
# feature_columns_new = X_body_test + X_hand0_test + X_hand1_test
|
| 355 |
+
feature=[]
|
| 356 |
+
for idx in range(15):
|
| 357 |
+
if(idx<len(bodypose_circles)):
|
| 358 |
+
feature.append(bodypose_circles[idx][0])
|
| 359 |
+
else:
|
| 360 |
+
feature.append(0)
|
| 361 |
+
|
| 362 |
+
for idx in range(15):
|
| 363 |
+
if(idx<len(bodypose_circles)):
|
| 364 |
+
feature.append(bodypose_circles[idx][1])
|
| 365 |
+
else:
|
| 366 |
+
feature.append(0)
|
| 367 |
+
|
| 368 |
+
for hand_idx in range(2):
|
| 369 |
+
for idx in range(21):
|
| 370 |
+
if(idx<len(handpose_peaks[hand_idx])):
|
| 371 |
+
feature.append(float(handpose_peaks[hand_idx][idx][0]))
|
| 372 |
+
else:
|
| 373 |
+
feature.append(0)
|
| 374 |
+
|
| 375 |
+
for idx in range(21):
|
| 376 |
+
if(idx<len(handpose_peaks[hand_idx])):
|
| 377 |
+
feature.append(float(handpose_peaks[hand_idx][idx][1]))
|
| 378 |
+
else:
|
| 379 |
+
feature.append(0)
|
| 380 |
+
|
| 381 |
+
for idx in range(21):
|
| 382 |
+
if(idx<len(handpose_peaks[hand_idx])):
|
| 383 |
+
feature.append(float(handpose_peaks[hand_idx][idx][2]))
|
| 384 |
+
else:
|
| 385 |
+
feature.append(0)
|
| 386 |
+
|
| 387 |
+
# for idx in range(21):
|
| 388 |
+
# if(idx<len(handpose_peaks[1])):
|
| 389 |
+
# feature.append(handpose_peaks[1][idx][0])
|
| 390 |
+
# else:
|
| 391 |
+
# feature.append(0)
|
| 392 |
+
|
| 393 |
+
# for idx in range(21):
|
| 394 |
+
# if(idx<len(handpose_peaks[1])):
|
| 395 |
+
# feature.append(handpose_peaks[1][idx][1])
|
| 396 |
+
# else:
|
| 397 |
+
# feature.append(0)
|
| 398 |
+
|
| 399 |
+
# for idx in range(21):
|
| 400 |
+
# if(idx<len(handpose_peaks[1])):
|
| 401 |
+
# feature.append(handpose_peaks[1][idx][2])
|
| 402 |
+
# else:
|
| 403 |
+
# feature.append(0)
|
| 404 |
+
|
| 405 |
+
# for idx,handedges in enumerate(handpose_edges):
|
| 406 |
+
# for (peaktxt, (handedge_x1, handedge_y1), (handedge_x2, handedge_y2)) in handedges:
|
| 407 |
+
# feature[f'hand{idx}edge_x1_{peaktxt}']=handedge_x1
|
| 408 |
+
# feature[f'hand{idx}edge_y1_{peaktxt}']=handedge_y1
|
| 409 |
+
# feature[f'hand{idx}edge_x2_{peaktxt}']=handedge_x2
|
| 410 |
+
# feature[f'hand{idx}edge_y2_{peaktxt}']=handedge_y2
|
| 411 |
+
|
| 412 |
+
X=np.array(feature)
|
| 413 |
+
# time_steps = 12 # Number of time steps
|
| 414 |
+
# num_features = X.shape[0] // time_steps # Number of features per time step
|
| 415 |
+
# X_reshaped = X.reshape(1,time_steps, num_features)
|
| 416 |
+
return X
|
| 417 |
+
|
| 418 |
+
def bodypos(self, oriImg):
|
| 419 |
+
model_type = 'body25'
|
| 420 |
+
scale_search = [0.5]
|
| 421 |
+
boxsize = 368
|
| 422 |
+
stride = 8
|
| 423 |
+
padValue = 128
|
| 424 |
+
thre1 = 0.1
|
| 425 |
+
thre2 = 0.05
|
| 426 |
+
multiplier = [x * boxsize / oriImg.shape[0] for x in scale_search]
|
| 427 |
+
heatmap_avg = np.zeros((oriImg.shape[0], oriImg.shape[1], self.njoint_body))
|
| 428 |
+
paf_avg = np.zeros((oriImg.shape[0], oriImg.shape[1], self.npaf_body))
|
| 429 |
+
|
| 430 |
+
for m in range(len(multiplier)):
|
| 431 |
+
scale = multiplier[m]
|
| 432 |
+
imageToTest = cv2.resize(oriImg, (0, 0), fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC)
|
| 433 |
+
imageToTest_padded, pad = util.padRightDownCorner(imageToTest, stride, padValue)
|
| 434 |
+
im = np.transpose(np.float32(imageToTest_padded[:, :, :, np.newaxis]), (3, 2, 0, 1)) / 256 - 0.5
|
| 435 |
+
im = np.ascontiguousarray(im)
|
| 436 |
+
|
| 437 |
+
data = torch.from_numpy(im).float()
|
| 438 |
+
if torch.cuda.is_available():
|
| 439 |
+
data = data.cuda()
|
| 440 |
+
with torch.no_grad():
|
| 441 |
+
Mconv7_stage6_L1, Mconv7_stage6_L2 = self.pt_body(data)
|
| 442 |
+
Mconv7_stage6_L1 = Mconv7_stage6_L1.cpu().numpy()
|
| 443 |
+
Mconv7_stage6_L2 = Mconv7_stage6_L2.cpu().numpy()
|
| 444 |
+
|
| 445 |
+
heatmap = np.transpose(np.squeeze(Mconv7_stage6_L2), (1, 2, 0)) # output 1 is heatmaps
|
| 446 |
+
heatmap = cv2.resize(heatmap, (0, 0), fx=stride, fy=stride, interpolation=cv2.INTER_CUBIC)
|
| 447 |
+
heatmap = heatmap[:imageToTest_padded.shape[0] - pad[2], :imageToTest_padded.shape[1] - pad[3], :]
|
| 448 |
+
heatmap = cv2.resize(heatmap, (oriImg.shape[1], oriImg.shape[0]), interpolation=cv2.INTER_CUBIC)
|
| 449 |
+
|
| 450 |
+
paf = np.transpose(np.squeeze(Mconv7_stage6_L1), (1, 2, 0)) # output 0 is PAFs
|
| 451 |
+
paf = cv2.resize(paf, (0, 0), fx=stride, fy=stride, interpolation=cv2.INTER_CUBIC)
|
| 452 |
+
paf = paf[:imageToTest_padded.shape[0] - pad[2], :imageToTest_padded.shape[1] - pad[3], :]
|
| 453 |
+
paf = cv2.resize(paf, (oriImg.shape[1], oriImg.shape[0]), interpolation=cv2.INTER_CUBIC)
|
| 454 |
+
|
| 455 |
+
heatmap_avg += heatmap_avg + heatmap / len(multiplier)
|
| 456 |
+
paf_avg += + paf / len(multiplier)
|
| 457 |
+
|
| 458 |
+
all_peaks = []
|
| 459 |
+
peak_counter = 0
|
| 460 |
+
|
| 461 |
+
for part in range(self.njoint_body-1):
|
| 462 |
+
map_ori = heatmap_avg[:, :, part]
|
| 463 |
+
one_heatmap = gaussian_filter(map_ori, sigma=3)
|
| 464 |
+
|
| 465 |
+
map_left = np.zeros(one_heatmap.shape)
|
| 466 |
+
map_left[1:, :] = one_heatmap[:-1, :]
|
| 467 |
+
map_right = np.zeros(one_heatmap.shape)
|
| 468 |
+
map_right[:-1, :] = one_heatmap[1:, :]
|
| 469 |
+
map_up = np.zeros(one_heatmap.shape)
|
| 470 |
+
map_up[:, 1:] = one_heatmap[:, :-1]
|
| 471 |
+
map_down = np.zeros(one_heatmap.shape)
|
| 472 |
+
map_down[:, :-1] = one_heatmap[:, 1:]
|
| 473 |
+
|
| 474 |
+
peaks_binary = np.logical_and.reduce(
|
| 475 |
+
(one_heatmap >= map_left, one_heatmap >= map_right, one_heatmap >= map_up, one_heatmap >= map_down, one_heatmap > thre1))
|
| 476 |
+
peaks = list(zip(np.nonzero(peaks_binary)[1], np.nonzero(peaks_binary)[0])) # note reverse
|
| 477 |
+
peaks_with_score = [x + (map_ori[x[1], x[0]],) for x in peaks]
|
| 478 |
+
peak_id = range(peak_counter, peak_counter + len(peaks))
|
| 479 |
+
peaks_with_score_and_id = [peaks_with_score[i] + (peak_id[i],) for i in range(len(peak_id))]
|
| 480 |
+
|
| 481 |
+
all_peaks.append(peaks_with_score_and_id)
|
| 482 |
+
peak_counter += len(peaks)
|
| 483 |
+
|
| 484 |
+
if model_type=='body25':
|
| 485 |
+
# find connection in the specified sequence, center 29 is in the position 15
|
| 486 |
+
limbSeq = [[1,0],[1,2],[2,3],[3,4],[1,5],[5,6],[6,7],[1,8],[8,9],[9,10],\
|
| 487 |
+
[10,11],[8,12],[12,13],[13,14],[0,15],[0,16],[15,17],[16,18],\
|
| 488 |
+
[11,24],[11,22],[14,21],[14,19],[22,23],[19,20]]
|
| 489 |
+
# the middle joints heatmap correpondence
|
| 490 |
+
mapIdx = [[30, 31],[14, 15],[16, 17],[18, 19],[22, 23],[24, 25],[26, 27],[0, 1],[6, 7],\
|
| 491 |
+
[2, 3],[4, 5], [8, 9],[10, 11],[12, 13],[32, 33],[34, 35],[36,37],[38,39],\
|
| 492 |
+
[50,51],[46,47],[44,45],[40,41],[48,49],[42,43]]
|
| 493 |
+
else:
|
| 494 |
+
# find connection in the specified sequence, center 29 is in the position 15
|
| 495 |
+
limbSeq = [[1, 2], [1, 5], [2, 3], [3, 4], [5, 6], [6, 7], [1, 8], [8, 9], \
|
| 496 |
+
[9, 10], [1, 11], [11, 12], [12, 13], [1, 0], [0, 14], [14, 16], \
|
| 497 |
+
[0, 15], [15, 17], [2, 16], [5, 17]]
|
| 498 |
+
# the middle joints heatmap correpondence
|
| 499 |
+
mapIdx = [[12, 13], [20, 21], [14, 15], [16, 17], [22, 23], [24, 25], [0, 1], [2, 3], \
|
| 500 |
+
[4, 5], [6, 7], [8, 9], [10, 11], [28, 29], [30, 31], [34, 35], [32, 33], \
|
| 501 |
+
[36, 37], [18, 19], [26, 27]]
|
| 502 |
+
|
| 503 |
+
connection_all = []
|
| 504 |
+
special_k = []
|
| 505 |
+
mid_num = 10
|
| 506 |
+
|
| 507 |
+
for k in range(len(mapIdx)):
|
| 508 |
+
score_mid = paf_avg[:, :, mapIdx[k]]
|
| 509 |
+
candA = all_peaks[limbSeq[k][0]]
|
| 510 |
+
candB = all_peaks[limbSeq[k][1]]
|
| 511 |
+
|
| 512 |
+
nA = len(candA)
|
| 513 |
+
nB = len(candB)
|
| 514 |
+
indexA, indexB = limbSeq[k]
|
| 515 |
+
if (nA != 0 and nB != 0):
|
| 516 |
+
connection_candidate = []
|
| 517 |
+
for i in range(nA):
|
| 518 |
+
for j in range(nB):
|
| 519 |
+
vec = np.subtract(candB[j][:2], candA[i][:2])
|
| 520 |
+
norm = math.sqrt(vec[0] * vec[0] + vec[1] * vec[1])
|
| 521 |
+
norm = max(0.001, norm)
|
| 522 |
+
vec = np.divide(vec, norm)
|
| 523 |
+
|
| 524 |
+
startend = list(zip(np.linspace(candA[i][0], candB[j][0], num=mid_num), \
|
| 525 |
+
np.linspace(candA[i][1], candB[j][1], num=mid_num)))
|
| 526 |
+
|
| 527 |
+
vec_x = np.array([score_mid[int(round(startend[I][1])), int(round(startend[I][0])), 0] \
|
| 528 |
+
for I in range(len(startend))])
|
| 529 |
+
vec_y = np.array([score_mid[int(round(startend[I][1])), int(round(startend[I][0])), 1] \
|
| 530 |
+
for I in range(len(startend))])
|
| 531 |
+
|
| 532 |
+
score_midpts = np.multiply(vec_x, vec[0]) + np.multiply(vec_y, vec[1])
|
| 533 |
+
score_with_dist_prior = sum(score_midpts) / len(score_midpts) + min(
|
| 534 |
+
0.5 * oriImg.shape[0] / norm - 1, 0)
|
| 535 |
+
criterion1 = len(np.nonzero(score_midpts > thre2)[0]) > 0.8 * len(score_midpts)
|
| 536 |
+
criterion2 = score_with_dist_prior > 0
|
| 537 |
+
if criterion1 and criterion2:
|
| 538 |
+
connection_candidate.append(
|
| 539 |
+
[i, j, score_with_dist_prior, score_with_dist_prior + candA[i][2] + candB[j][2]])
|
| 540 |
+
|
| 541 |
+
connection_candidate = sorted(connection_candidate, key=lambda x: x[2], reverse=True)
|
| 542 |
+
connection = np.zeros((0, 5))
|
| 543 |
+
for c in range(len(connection_candidate)):
|
| 544 |
+
i, j, s = connection_candidate[c][0:3]
|
| 545 |
+
if (i not in connection[:, 3] and j not in connection[:, 4]):
|
| 546 |
+
connection = np.vstack([connection, [candA[i][3], candB[j][3], s, i, j]])
|
| 547 |
+
if (len(connection) >= min(nA, nB)):
|
| 548 |
+
break
|
| 549 |
+
|
| 550 |
+
connection_all.append(connection)
|
| 551 |
+
else:
|
| 552 |
+
special_k.append(k)
|
| 553 |
+
connection_all.append([])
|
| 554 |
+
|
| 555 |
+
# last number in each row is the total parts number of that person
|
| 556 |
+
# the second last number in each row is the score of the overall configuration
|
| 557 |
+
subset = -1 * np.ones((0, self.njoint_body+1))
|
| 558 |
+
candidate = np.array([item for sublist in all_peaks for item in sublist])
|
| 559 |
+
|
| 560 |
+
for k in range(len(mapIdx)):
|
| 561 |
+
if k not in special_k:
|
| 562 |
+
partAs = connection_all[k][:, 0]
|
| 563 |
+
partBs = connection_all[k][:, 1]
|
| 564 |
+
indexA, indexB = np.array(limbSeq[k])
|
| 565 |
+
|
| 566 |
+
for i in range(len(connection_all[k])): # = 1:size(temp,1)
|
| 567 |
+
found = 0
|
| 568 |
+
subset_idx = [-1, -1]
|
| 569 |
+
for j in range(len(subset)): # 1:size(subset,1):
|
| 570 |
+
if subset[j][indexA] == partAs[i] or subset[j][indexB] == partBs[i]:
|
| 571 |
+
subset_idx[found] = j
|
| 572 |
+
found += 1
|
| 573 |
+
|
| 574 |
+
if found == 1:
|
| 575 |
+
j = subset_idx[0]
|
| 576 |
+
if subset[j][indexB] != partBs[i]:
|
| 577 |
+
subset[j][indexB] = partBs[i]
|
| 578 |
+
subset[j][-1] += 1
|
| 579 |
+
subset[j][-2] += candidate[partBs[i].astype(int), 2] + connection_all[k][i][2]
|
| 580 |
+
elif found == 2: # if found 2 and disjoint, merge them
|
| 581 |
+
j1, j2 = subset_idx
|
| 582 |
+
membership = ((subset[j1] >= 0).astype(int) + (subset[j2] >= 0).astype(int))[:-2]
|
| 583 |
+
if len(np.nonzero(membership == 2)[0]) == 0: # merge
|
| 584 |
+
subset[j1][:-2] += (subset[j2][:-2] + 1)
|
| 585 |
+
subset[j1][-2:] += subset[j2][-2:]
|
| 586 |
+
subset[j1][-2] += connection_all[k][i][2]
|
| 587 |
+
subset = np.delete(subset, j2, 0)
|
| 588 |
+
else: # as like found == 1
|
| 589 |
+
subset[j1][indexB] = partBs[i]
|
| 590 |
+
subset[j1][-1] += 1
|
| 591 |
+
subset[j1][-2] += candidate[partBs[i].astype(int), 2] + connection_all[k][i][2]
|
| 592 |
+
|
| 593 |
+
# if find no partA in the subset, create a new subset
|
| 594 |
+
elif not found and k < self.njoint_body-2:
|
| 595 |
+
row = -1 * np.ones(self.njoint_body+1)
|
| 596 |
+
row[indexA] = partAs[i]
|
| 597 |
+
row[indexB] = partBs[i]
|
| 598 |
+
row[-1] = 2
|
| 599 |
+
row[-2] = sum(candidate[connection_all[k][i, :2].astype(int), 2]) + connection_all[k][i][2]
|
| 600 |
+
subset = np.vstack([subset, row])
|
| 601 |
+
# delete some rows of subset which has few parts occur
|
| 602 |
+
deleteIdx = []
|
| 603 |
+
for i in range(len(subset)):
|
| 604 |
+
if subset[i][-1] < 4 or subset[i][-2] / subset[i][-1] < 0.4:
|
| 605 |
+
deleteIdx.append(i)
|
| 606 |
+
subset = np.delete(subset, deleteIdx, axis=0)
|
| 607 |
+
|
| 608 |
+
# subset: n*20 array, 0-17 is the index in candidate, 18 is the total score, 19 is the total parts
|
| 609 |
+
# candidate: x, y, score, id
|
| 610 |
+
return candidate, subset
|
| 611 |
+
|
| 612 |
+
def handpos(self, oriImg):
|
| 613 |
+
scale_search = [0.5, 1.0, 1.5, 2.0]
|
| 614 |
+
# scale_search = [0.5]
|
| 615 |
+
boxsize = 368
|
| 616 |
+
stride = 8
|
| 617 |
+
padValue = 128
|
| 618 |
+
thre = 0.05
|
| 619 |
+
multiplier = [x * boxsize / oriImg.shape[0] for x in scale_search]
|
| 620 |
+
heatmap_avg = np.zeros((oriImg.shape[0], oriImg.shape[1], 22))
|
| 621 |
+
# paf_avg = np.zeros((oriImg.shape[0], oriImg.shape[1], 38))
|
| 622 |
+
|
| 623 |
+
for m in range(len(multiplier)):
|
| 624 |
+
scale = multiplier[m]
|
| 625 |
+
imageToTest = cv2.resize(oriImg, (0, 0), fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC)
|
| 626 |
+
imageToTest_padded, pad = util.padRightDownCorner(imageToTest, stride, padValue)
|
| 627 |
+
im = np.transpose(np.float32(imageToTest_padded[:, :, :, np.newaxis]), (3, 2, 0, 1)) / 256 - 0.5
|
| 628 |
+
im = np.ascontiguousarray(im)
|
| 629 |
+
|
| 630 |
+
data = torch.from_numpy(im).float()
|
| 631 |
+
if torch.cuda.is_available():
|
| 632 |
+
data = data.cuda()
|
| 633 |
+
# data = data.permute([2, 0, 1]).unsqueeze(0).float()
|
| 634 |
+
with torch.no_grad():
|
| 635 |
+
output = self.pt_hand(data).cpu().numpy()
|
| 636 |
+
# output = self.model(data).numpy()q
|
| 637 |
+
|
| 638 |
+
# extract outputs, resize, and remove padding
|
| 639 |
+
heatmap = np.transpose(np.squeeze(output), (1, 2, 0)) # output 1 is heatmaps
|
| 640 |
+
heatmap = cv2.resize(heatmap, (0, 0), fx=stride, fy=stride, interpolation=cv2.INTER_CUBIC)
|
| 641 |
+
heatmap = heatmap[:imageToTest_padded.shape[0] - pad[2], :imageToTest_padded.shape[1] - pad[3], :]
|
| 642 |
+
heatmap = cv2.resize(heatmap, (oriImg.shape[1], oriImg.shape[0]), interpolation=cv2.INTER_CUBIC)
|
| 643 |
+
|
| 644 |
+
heatmap_avg += heatmap / len(multiplier)
|
| 645 |
+
|
| 646 |
+
all_peaks = []
|
| 647 |
+
for part in range(21):
|
| 648 |
+
map_ori = heatmap_avg[:, :, part]
|
| 649 |
+
one_heatmap = gaussian_filter(map_ori, sigma=3)
|
| 650 |
+
binary = np.ascontiguousarray(one_heatmap > thre, dtype=np.uint8)
|
| 651 |
+
# 全部小于阈值
|
| 652 |
+
if np.sum(binary) == 0:
|
| 653 |
+
all_peaks.append([0, 0])
|
| 654 |
+
continue
|
| 655 |
+
label_img, label_numbers = label(binary, return_num=True, connectivity=binary.ndim)
|
| 656 |
+
max_index = np.argmax([np.sum(map_ori[label_img == i]) for i in range(1, label_numbers + 1)]) + 1
|
| 657 |
+
label_img[label_img != max_index] = 0
|
| 658 |
+
map_ori[label_img == 0] = 0
|
| 659 |
+
|
| 660 |
+
y, x = util.npmax(map_ori)
|
| 661 |
+
all_peaks.append([x, y])
|
| 662 |
+
return np.array(all_peaks)
|
app.py
ADDED
|
@@ -0,0 +1,653 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#Modified by Augmented Startups 2021
|
| 2 |
+
#Face Landmark User Interface with StreamLit
|
| 3 |
+
#Watch Computer Vision Tutorials at www.augmentedstartups.info/YouTube
|
| 4 |
+
import streamlit as st
|
| 5 |
+
st.write("Booting…")
|
| 6 |
+
import os
|
| 7 |
+
os.environ["KERAS_BACKEND"] = "torch"
|
| 8 |
+
import keras
|
| 9 |
+
|
| 10 |
+
import cv2
|
| 11 |
+
import numpy as np
|
| 12 |
+
import tempfile
|
| 13 |
+
import time
|
| 14 |
+
from PIL import Image
|
| 15 |
+
from keras.models import Sequential
|
| 16 |
+
import os
|
| 17 |
+
from keras.models import Sequential
|
| 18 |
+
import pickle
|
| 19 |
+
import keras
|
| 20 |
+
from keras.models import Sequential
|
| 21 |
+
import os
|
| 22 |
+
from keras.layers import LSTM, Dense, Bidirectional, Dropout,Input,BatchNormalization
|
| 23 |
+
from model import handpose_model, bodypose_25_model
|
| 24 |
+
from expression_mapping import expression_mapping
|
| 25 |
+
from ISL_Model_parameter import ISLSignPosTranslator
|
| 26 |
+
import pandas as pd
|
| 27 |
+
import numpy as np
|
| 28 |
+
import ffmpeg
|
| 29 |
+
import subprocess
|
| 30 |
+
from typing import NamedTuple
|
| 31 |
+
import json
|
| 32 |
+
import util
|
| 33 |
+
from huggingface_hub import hf_hub_download
|
| 34 |
+
|
| 35 |
+
import shutil, platform, subprocess
|
| 36 |
+
|
| 37 |
+
st.write("Python:", platform.python_version())
|
| 38 |
+
st.write("FFmpeg exists:", shutil.which("ffmpeg"), "FFprobe:", shutil.which("ffprobe"))
|
| 39 |
+
|
| 40 |
+
try:
|
| 41 |
+
import cv2
|
| 42 |
+
st.write("OpenCV version:", cv2.__version__)
|
| 43 |
+
except Exception as e:
|
| 44 |
+
st.error(f"OpenCV import failed: {e}")
|
| 45 |
+
|
| 46 |
+
try:
|
| 47 |
+
import torch, keras
|
| 48 |
+
st.write("Torch:", torch.__version__, "Keras:", keras.__version__)
|
| 49 |
+
except Exception as e:
|
| 50 |
+
st.error(f"Torch/Keras import failed: {e}")
|
| 51 |
+
|
| 52 |
+
class FFProbeResult(NamedTuple):
|
| 53 |
+
return_code: int
|
| 54 |
+
json: str
|
| 55 |
+
error: str
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
def ffprobe(file_path) -> FFProbeResult:
|
| 59 |
+
command_array = ["ffprobe",
|
| 60 |
+
"-v", "quiet",
|
| 61 |
+
"-print_format", "json",
|
| 62 |
+
"-show_format",
|
| 63 |
+
"-show_streams",
|
| 64 |
+
file_path]
|
| 65 |
+
result = subprocess.run(command_array, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True)
|
| 66 |
+
return FFProbeResult(return_code=result.returncode,
|
| 67 |
+
json=result.stdout,
|
| 68 |
+
error=result.stderr)
|
| 69 |
+
X_body_test = [f'bodypeaks_x_{i}' for i in range(15)] + [f'bodypeaks_y_{i}' for i in range(15)]
|
| 70 |
+
X_hand0_test = [f'hand0peaks_x_{i}' for i in range(21)] + [f'hand0peaks_y_{i}' for i in range(21)] + [f'hand0peaks_peaktxt{i}' for i in range(21)]
|
| 71 |
+
X_hand1_test = [f'hand1peaks_x_{i}' for i in range(21)] + [f'hand1peaks_y_{i}' for i in range(21)] + [f'hand1peaks_peaktxt{i}' for i in range(21)]
|
| 72 |
+
|
| 73 |
+
feature_columns_new = X_body_test + X_hand0_test + X_hand1_test
|
| 74 |
+
label_columns = ['Expression_encoded']
|
| 75 |
+
|
| 76 |
+
@st.cache_resource
|
| 77 |
+
def create_timeseries_data(isl_data,feature_columns,label_columns, window_size=20):
|
| 78 |
+
"""
|
| 79 |
+
Creates timeseries data from a DataFrame with a specified window size
|
| 80 |
+
and padding at the end.
|
| 81 |
+
|
| 82 |
+
Args:
|
| 83 |
+
df (pandas.DataFrame): The input DataFrame.
|
| 84 |
+
window_size (int, optional): The window size for creating timeseries data. Defaults to 20.
|
| 85 |
+
pad_value (any, optional): The value to use for padding at the end. Defaults to None.
|
| 86 |
+
|
| 87 |
+
Returns:
|
| 88 |
+
list: A list of lists, where each inner list represents a window of timeseries data.
|
| 89 |
+
"""
|
| 90 |
+
|
| 91 |
+
# Handle empty DataFrame
|
| 92 |
+
if isl_data.empty:
|
| 93 |
+
return [],[]
|
| 94 |
+
|
| 95 |
+
X=[]
|
| 96 |
+
y=[]
|
| 97 |
+
i=0
|
| 98 |
+
for group, file_df in isl_data.groupby(['Type','Expression_encoded','FileName']):
|
| 99 |
+
expr_types,exprs,filepaths=group
|
| 100 |
+
# print('expr_types,exprs,filepaths',(expr_types,exprs,filepaths))
|
| 101 |
+
# print(type(name))
|
| 102 |
+
# Get the rolling window iterator with padding
|
| 103 |
+
first_frame=np.zeros((1,156))
|
| 104 |
+
for idx,x in enumerate([file_df[i:i+window_size] for i in range(0,file_df.shape[0],1)]):#enumerate(file_df.rolling(window=20, step=20,min_periods=1)):
|
| 105 |
+
# print(f'records processed {idx} of {file_df.shape[0]}')
|
| 106 |
+
# print(f"{filepaths}-Frame#{x['Frame'].values}/{file_df['Frame'].max()}")
|
| 107 |
+
if x.shape[0]<window_size:
|
| 108 |
+
X.append(np.concatenate((np.repeat(first_frame, (window_size-x.shape[0]), axis=0),x[feature_columns].values), axis=0))
|
| 109 |
+
y.append(exprs)
|
| 110 |
+
# print('len(X)',len(X))
|
| 111 |
+
# print('len(y)',len(y))
|
| 112 |
+
continue
|
| 113 |
+
|
| 114 |
+
|
| 115 |
+
X.append(x[feature_columns].values)
|
| 116 |
+
y.append(exprs)
|
| 117 |
+
# print('len(X)',len(X))
|
| 118 |
+
# print('len(y)',len(y))
|
| 119 |
+
# if idx>4:
|
| 120 |
+
# break
|
| 121 |
+
|
| 122 |
+
# i=i+1
|
| 123 |
+
# if i>4:
|
| 124 |
+
# break
|
| 125 |
+
|
| 126 |
+
return X,y
|
| 127 |
+
|
| 128 |
+
translation_model=None
|
| 129 |
+
|
| 130 |
+
@st.cache_resource
|
| 131 |
+
def get_translator_model():
|
| 132 |
+
translation_model = Sequential()
|
| 133 |
+
translation_model.add(Input(shape=((20, 156))))
|
| 134 |
+
translation_model.add(keras.layers.Masking(mask_value=0.))
|
| 135 |
+
translation_model.add(BatchNormalization())
|
| 136 |
+
translation_model.add(Bidirectional(LSTM(32, recurrent_dropout=0.2, return_sequences=True)))
|
| 137 |
+
|
| 138 |
+
translation_model.add(Dropout(0.2))
|
| 139 |
+
translation_model.add(Bidirectional(LSTM(32, recurrent_dropout=0.2)))
|
| 140 |
+
|
| 141 |
+
translation_model.add(keras.layers.Activation('elu'))
|
| 142 |
+
translation_model.add(Dense(32, use_bias=False, kernel_initializer='he_normal'))
|
| 143 |
+
|
| 144 |
+
translation_model.add(BatchNormalization())
|
| 145 |
+
translation_model.add(Dropout(0.2))
|
| 146 |
+
translation_model.add(keras.layers.Activation('elu'))
|
| 147 |
+
translation_model.add(Dense(32, kernel_initializer='he_normal',use_bias=False))
|
| 148 |
+
|
| 149 |
+
translation_model.add(BatchNormalization())
|
| 150 |
+
translation_model.add(keras.layers.Activation('elu'))
|
| 151 |
+
translation_model.add(Dropout(0.2))
|
| 152 |
+
translation_model.add(Dense(len(list(expression_mapping.keys())), activation='softmax'))
|
| 153 |
+
translation_model.load_weights('isl_model_final.keras')
|
| 154 |
+
return translation_model
|
| 155 |
+
|
| 156 |
+
|
| 157 |
+
testing_cleaned = hf_hub_download(
|
| 158 |
+
repo_id="sunilsarolkar/isl-test-data",
|
| 159 |
+
filename="testing_cleaned.csv",
|
| 160 |
+
repo_type="dataset"
|
| 161 |
+
)
|
| 162 |
+
|
| 163 |
+
testing_df=pd.read_csv(testing_cleaned)
|
| 164 |
+
test_files = hf_hub_download(
|
| 165 |
+
repo_id="sunilsarolkar/isl-test-data",
|
| 166 |
+
filename="test_files.csv",
|
| 167 |
+
repo_type="dataset"
|
| 168 |
+
)
|
| 169 |
+
# test_statistic_df=pd.read_csv('test_statistic.csv')
|
| 170 |
+
test_files_df=pd.read_csv(test_files)
|
| 171 |
+
# mp_drawing = mp.solutions.drawing_utils
|
| 172 |
+
# mp_face_mesh = mp.solutions.face_mesh
|
| 173 |
+
|
| 174 |
+
|
| 175 |
+
class Writer():
|
| 176 |
+
def __init__(self, output_file, input_fps, input_framesize, input_pix_fmt,
|
| 177 |
+
input_vcodec):
|
| 178 |
+
# if os.path.exists(output_file):
|
| 179 |
+
# os.remove(output_file)
|
| 180 |
+
self.ff_proc = (
|
| 181 |
+
ffmpeg
|
| 182 |
+
.input('pipe:',
|
| 183 |
+
format='rawvideo',
|
| 184 |
+
pix_fmt="bgr24",
|
| 185 |
+
s='%sx%s'%(input_framesize[1],input_framesize[0]),
|
| 186 |
+
r=input_fps)
|
| 187 |
+
.output(output_file, pix_fmt=input_pix_fmt, vcodec=input_vcodec)
|
| 188 |
+
.overwrite_output()
|
| 189 |
+
.run_async(pipe_stdin=True)
|
| 190 |
+
)
|
| 191 |
+
|
| 192 |
+
def __call__(self, frame):
|
| 193 |
+
self.ff_proc.stdin.write(frame.tobytes())
|
| 194 |
+
|
| 195 |
+
def close(self):
|
| 196 |
+
self.ff_proc.stdin.close()
|
| 197 |
+
self.ff_proc.wait()
|
| 198 |
+
|
| 199 |
+
|
| 200 |
+
st.title('ISL Indian Sign Language translation using LSTM')
|
| 201 |
+
|
| 202 |
+
st.markdown(
|
| 203 |
+
"""
|
| 204 |
+
<style>
|
| 205 |
+
[data-testid="stSidebar"][aria-expanded="true"] > div:first-child {
|
| 206 |
+
width: 350px;
|
| 207 |
+
}
|
| 208 |
+
[data-testid="stSidebar"][aria-expanded="false"] > div:first-child {
|
| 209 |
+
width: 350px;
|
| 210 |
+
margin-left: -350px;
|
| 211 |
+
}
|
| 212 |
+
</style>
|
| 213 |
+
""",
|
| 214 |
+
unsafe_allow_html=True,
|
| 215 |
+
)
|
| 216 |
+
|
| 217 |
+
st.sidebar.title('ISL Sign Language Translation using Openpose')
|
| 218 |
+
st.sidebar.subheader('Parameters')
|
| 219 |
+
frame_wise_outputs={}
|
| 220 |
+
|
| 221 |
+
def weighted_average(nums, weights):
|
| 222 |
+
if sum(weights)==0:
|
| 223 |
+
return 0
|
| 224 |
+
return sum(x * y for x, y in zip(nums, weights)) / sum(weights)
|
| 225 |
+
|
| 226 |
+
|
| 227 |
+
@st.cache_data
|
| 228 |
+
def image_resize(image, width=None, height=None, inter=cv2.INTER_AREA):
|
| 229 |
+
# initialize the dimensions of the image to be resized and
|
| 230 |
+
# grab the image size
|
| 231 |
+
dim = None
|
| 232 |
+
(h, w) = image.shape[:2]
|
| 233 |
+
|
| 234 |
+
# if both the width and height are None, then return the
|
| 235 |
+
# original image
|
| 236 |
+
if width is None and height is None:
|
| 237 |
+
return image
|
| 238 |
+
|
| 239 |
+
# check to see if the width is None
|
| 240 |
+
if width is None:
|
| 241 |
+
# calculate the ratio of the height and construct the
|
| 242 |
+
# dimensions
|
| 243 |
+
r = height / float(h)
|
| 244 |
+
dim = (int(w * r), height)
|
| 245 |
+
|
| 246 |
+
# otherwise, the height is None
|
| 247 |
+
else:
|
| 248 |
+
# calculate the ratio of the width and construct the
|
| 249 |
+
# dimensions
|
| 250 |
+
r = width / float(w)
|
| 251 |
+
dim = (width, int(h * r))
|
| 252 |
+
|
| 253 |
+
# resize the image
|
| 254 |
+
resized = cv2.resize(image, dim, interpolation=inter)
|
| 255 |
+
|
| 256 |
+
# return the resized image
|
| 257 |
+
return resized
|
| 258 |
+
|
| 259 |
+
app_mode = st.sidebar.selectbox('Choose the App mode',
|
| 260 |
+
['About App','Run on Test Videos']
|
| 261 |
+
)
|
| 262 |
+
|
| 263 |
+
if app_mode =='About App':
|
| 264 |
+
st.markdown('In this application we are demonstrating model developed for translating the Indian Sign Language(ISL) using LSTM')
|
| 265 |
+
st.markdown(
|
| 266 |
+
"""
|
| 267 |
+
<style>
|
| 268 |
+
[data-testid="stSidebar"][aria-expanded="true"] > div:first-child {
|
| 269 |
+
width: 400px;
|
| 270 |
+
}
|
| 271 |
+
[data-testid="stSidebar"][aria-expanded="false"] > div:first-child {
|
| 272 |
+
width: 400px;
|
| 273 |
+
margin-left: -400px;
|
| 274 |
+
}
|
| 275 |
+
</style>
|
| 276 |
+
""",
|
| 277 |
+
unsafe_allow_html=True,
|
| 278 |
+
)
|
| 279 |
+
# st.video('https://www.youtube.com/watch?v=FMaNNXgB_5c&ab_channel=AugmentedStartups')
|
| 280 |
+
|
| 281 |
+
st.markdown('''
|
| 282 |
+
# Dataset Used \n
|
| 283 |
+
This model is trained using [INCLUDE](https://zenodo.org/records/4010759) dataset. \n
|
| 284 |
+
|
| 285 |
+
### Key Statistics for the dataset is as follows-
|
| 286 |
+
|
| 287 |
+
|
| 288 |
+
|
| 289 |
+
+-----------------------+-----------------+
|
| 290 |
+
| Charasteristics | INCLUDE-DATASET |
|
| 291 |
+
+-----------------------+-----------------+
|
| 292 |
+
| Categories | 15 |
|
| 293 |
+
| Words | 263 |
|
| 294 |
+
| Videos | 4257 |
|
| 295 |
+
| Avg Videos per class | 16.3 |
|
| 296 |
+
| Avg Video Length | 2.57s |
|
| 297 |
+
| Min Video Length | 1.28s |
|
| 298 |
+
| Max Video Length | 6.16s |
|
| 299 |
+
| Frame Rate | 25fps |
|
| 300 |
+
| Resolution | 1920x1080 |
|
| 301 |
+
+-----------------------+-----------------+
|
| 302 |
+
#### Size of each category
|
| 303 |
+
|
| 304 |
+
|
| 305 |
+
+--------------------+-------------------+------------------+
|
| 306 |
+
| Category | Number of Classes | Number of Videos |
|
| 307 |
+
+--------------------+-------------------+------------------+
|
| 308 |
+
| Adjectives | 59 | 791 |
|
| 309 |
+
| Animals | 8 | 166 |
|
| 310 |
+
| Clothes | 10 | 198 |
|
| 311 |
+
| Colours | 11 | 222 |
|
| 312 |
+
| Days and Time | 22 | 306 |
|
| 313 |
+
| Electronics | 10 | 140 |
|
| 314 |
+
| Greetings | 9 | 185 |
|
| 315 |
+
| Means of Transport | 9 | 186 |
|
| 316 |
+
| Objects at Home | 27 | 379 |
|
| 317 |
+
| Occupations | 16 | 225 |
|
| 318 |
+
| People | 26 | 513 |
|
| 319 |
+
| Places | 19 | 399 |
|
| 320 |
+
| Pronouns | 8 | 168 |
|
| 321 |
+
| Seasons | 6 | 85 |
|
| 322 |
+
| Society | 23 | 324 |
|
| 323 |
+
| | Categories# 263 | Total Videos-4287|
|
| 324 |
+
+--------------------+-------------------+------------------+
|
| 325 |
+
|
| 326 |
+
|
| 327 |
+
|
| 328 |
+
Below are count of videos we were able to process (1986 of 4287). We processed limited set of records due to time/compute constraints.
|
| 329 |
+
|
| 330 |
+
''')
|
| 331 |
+
|
| 332 |
+
image = np.array(Image.open('eda/categories_processed.png'))
|
| 333 |
+
# categories_processed = np.array(Image.open('categories_processed.png'))
|
| 334 |
+
st.image(image)
|
| 335 |
+
st.markdown('''
|
| 336 |
+
#### Below are the count of Videos per Label for each Dataframe
|
| 337 |
+
''')
|
| 338 |
+
image = np.array(Image.open('eda/distribution_of_data.png'))
|
| 339 |
+
# categories_processed = np.array(Image.open('categories_processed.png'))
|
| 340 |
+
|
| 341 |
+
|
| 342 |
+
st.image(image)
|
| 343 |
+
|
| 344 |
+
st.markdown('''
|
| 345 |
+
### Date Pipeline
|
| 346 |
+
''')
|
| 347 |
+
|
| 348 |
+
image = np.array(Image.open('DataPipeline.png'))
|
| 349 |
+
# categories_processed = np.array(Image.open('categories_processed.png'))
|
| 350 |
+
st.image(image)
|
| 351 |
+
st.markdown('''
|
| 352 |
+
### Model structure
|
| 353 |
+
```
|
| 354 |
+
translation_model = Sequential()
|
| 355 |
+
translation_model.add(Input(shape=((20, 156))))
|
| 356 |
+
translation_model.add(keras.layers.Masking(mask_value=0.))
|
| 357 |
+
translation_model.add(BatchNormalization())
|
| 358 |
+
translation_model.add(Bidirectional(LSTM(32, recurrent_dropout=0.2, return_sequences=True)))
|
| 359 |
+
|
| 360 |
+
translation_model.add(Dropout(0.2))
|
| 361 |
+
translation_model.add(Bidirectional(LSTM(32, recurrent_dropout=0.2)))
|
| 362 |
+
|
| 363 |
+
translation_model.add(keras.layers.Activation('elu'))
|
| 364 |
+
translation_model.add(Dense(32, use_bias=False, kernel_initializer='he_normal'))
|
| 365 |
+
|
| 366 |
+
translation_model.add(BatchNormalization())
|
| 367 |
+
translation_model.add(Dropout(0.2))
|
| 368 |
+
translation_model.add(keras.layers.Activation('elu'))
|
| 369 |
+
translation_model.add(Dense(32, kernel_initializer='he_normal',use_bias=False))
|
| 370 |
+
|
| 371 |
+
translation_model.add(BatchNormalization())
|
| 372 |
+
translation_model.add(keras.layers.Activation('elu'))
|
| 373 |
+
translation_model.add(Dropout(0.2))
|
| 374 |
+
translation_model.add(Dense(len(list(expression_mapping.keys())), activation='softmax'))
|
| 375 |
+
isl_translator=ISLSignPosTranslator(bodypose_25_model(),handpose_model(), translation_model)
|
| 376 |
+
```
|
| 377 |
+
|
| 378 |
+
Total params: 82,679 (322.96 KB)
|
| 379 |
+
Trainable params: 82,239 (321.25 KB)
|
| 380 |
+
Non-trainable params: 440 (1.72 KB)
|
| 381 |
+
''')
|
| 382 |
+
image = np.array(Image.open('model-graph.png'))
|
| 383 |
+
# categories_processed = np.array(Image.open('categories_processed.png'))
|
| 384 |
+
st.image(image)
|
| 385 |
+
st.markdown('''
|
| 386 |
+
# Training
|
| 387 |
+
[Tensorboard](https://huggingface.co/cdsteameight/ISL-SignLanguageTranslation/tensorboard)
|
| 388 |
+
|
| 389 |
+
''')
|
| 390 |
+
|
| 391 |
+
elif app_mode =='Run on Test Videos':
|
| 392 |
+
# placeholder = st.empty()
|
| 393 |
+
category = st.sidebar.selectbox('Choose Category',
|
| 394 |
+
np.sort(test_files_df['Category'].unique(), axis=-1, kind='mergesort'))
|
| 395 |
+
# print(category)
|
| 396 |
+
mask = (test_files_df['Category']==category)
|
| 397 |
+
test_files_df_category=test_files_df[mask]
|
| 398 |
+
cls = st.sidebar.selectbox('Choose Class',
|
| 399 |
+
np.sort(test_files_df_category['Class'].unique(), axis=-1, kind='mergesort')
|
| 400 |
+
)
|
| 401 |
+
mask = (test_files_df['Class']==cls)
|
| 402 |
+
filename = st.sidebar.selectbox('Choose File',
|
| 403 |
+
np.sort(test_files_df_category[mask]['Filename'].unique(), axis=-1, kind='mergesort')
|
| 404 |
+
)
|
| 405 |
+
# print(f'test/{category}/{cls}/{filename}')
|
| 406 |
+
# mask = (include_df['Filepath'].str.contains(key[0])) & (include_df['type']==key[2]) & (include_df['expression']==key[1])
|
| 407 |
+
# stframe = st.empty()
|
| 408 |
+
|
| 409 |
+
if st.sidebar.button("Start", type="primary"):
|
| 410 |
+
mask = (testing_df['FileName'] == filename) & (testing_df['Type']==category)& (testing_df['Expression']==cls)
|
| 411 |
+
# filtered_df = current_test_df.sort_
|
| 412 |
+
|
| 413 |
+
window_size=20
|
| 414 |
+
current_test_df=testing_df[mask]
|
| 415 |
+
X_test_filtered,y_test_filtered = create_timeseries_data(current_test_df,feature_columns_new,label_columns,window_size=window_size)
|
| 416 |
+
# y_filtered_encoded=to_categorical(y_test_filtered, num_classes=len(df['Expression_encoded'].unique()))
|
| 417 |
+
X_test_filtered=np.array(X_test_filtered)
|
| 418 |
+
|
| 419 |
+
# encoded_translation=model(frame.reshape(1,frame.shape[0],frame.shape[1]))
|
| 420 |
+
st.set_option('deprecation.showfileUploaderEncoding', False)
|
| 421 |
+
|
| 422 |
+
# use_webcam = st.sidebar.button('Use Webcam')
|
| 423 |
+
# record = st.sidebar.checkbox("Record Video")
|
| 424 |
+
# if record:
|
| 425 |
+
# st.checkbox("Recording", value=True)
|
| 426 |
+
|
| 427 |
+
st.sidebar.markdown('---')
|
| 428 |
+
st.markdown(
|
| 429 |
+
"""
|
| 430 |
+
<style>
|
| 431 |
+
[data-testid="stSidebar"][aria-expanded="true"] > div:first-child {
|
| 432 |
+
width: 400px;
|
| 433 |
+
}
|
| 434 |
+
[data-testid="stSidebar"][aria-expanded="false"] > div:first-child {
|
| 435 |
+
width: 400px;
|
| 436 |
+
margin-left: -400px;
|
| 437 |
+
}
|
| 438 |
+
</style>
|
| 439 |
+
""",
|
| 440 |
+
unsafe_allow_html=True,
|
| 441 |
+
)
|
| 442 |
+
|
| 443 |
+
st.sidebar.markdown('---')
|
| 444 |
+
|
| 445 |
+
st.markdown(' ## Output')
|
| 446 |
+
|
| 447 |
+
runtime_progress = st.empty()
|
| 448 |
+
|
| 449 |
+
with runtime_progress.container():
|
| 450 |
+
df1 = pd.DataFrame([['--','--']], columns=['Frames Processed','Detected Class'])
|
| 451 |
+
|
| 452 |
+
my_table = st.table(df1)
|
| 453 |
+
# kpi1, kpi2 = st.columns(2)
|
| 454 |
+
|
| 455 |
+
# with kpi1:
|
| 456 |
+
# st.markdown("**Frames Processed**")
|
| 457 |
+
# kpi1_text = st.markdown(f'0/{current_test_df.shape[0]}')
|
| 458 |
+
|
| 459 |
+
# with kpi2:
|
| 460 |
+
# st.markdown("**Detected Class**")
|
| 461 |
+
# kpi2_text = st.markdown("--")
|
| 462 |
+
|
| 463 |
+
view = st.empty()
|
| 464 |
+
|
| 465 |
+
st.markdown("<hr/>", unsafe_allow_html=True)
|
| 466 |
+
stframes = st.empty()#[st.empty() for _ in range(20)]
|
| 467 |
+
# video_file_buffer = st.sidebar.file_uploader("Upload a video", type=[ "mp4", "mov",'avi','asf', 'm4v' ])
|
| 468 |
+
# tfflie = tempfile.NamedTemporaryFile(delete=False)
|
| 469 |
+
|
| 470 |
+
vid_file = hf_hub_download(
|
| 471 |
+
repo_id="sunilsarolkar/isl-test-data",
|
| 472 |
+
filename=f'test/{category}/{cls}/{filename}',
|
| 473 |
+
repo_type="dataset"
|
| 474 |
+
)
|
| 475 |
+
|
| 476 |
+
vid = cv2.VideoCapture(vid_file)
|
| 477 |
+
|
| 478 |
+
ffprobe_result = ffprobe(vid_file)
|
| 479 |
+
info = json.loads(ffprobe_result.json)
|
| 480 |
+
videoinfo = [i for i in info["streams"] if i["codec_type"] == "video"][0]
|
| 481 |
+
input_fps = videoinfo["avg_frame_rate"]
|
| 482 |
+
# input_fps = float(input_fps[0])/float(input_fps[1])
|
| 483 |
+
input_pix_fmt = videoinfo["pix_fmt"]
|
| 484 |
+
input_vcodec = videoinfo["codec_name"]
|
| 485 |
+
postfix = info["format"]["format_name"].split(",")[0]
|
| 486 |
+
# print(f'input_vcodec-{input_vcodec}')
|
| 487 |
+
|
| 488 |
+
width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
|
| 489 |
+
height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
| 490 |
+
fps_input = int(vid.get(cv2.CAP_PROP_FPS))
|
| 491 |
+
|
| 492 |
+
#codec = cv2.VideoWriter_fourcc(*FLAGS.output_format)
|
| 493 |
+
# codec = cv2.VideoWriter_fourcc('V','P','0','9')
|
| 494 |
+
# out = cv2.VideoWriter('output1.mp4', codec, fps_input, (width, height))
|
| 495 |
+
|
| 496 |
+
# st.sidebar.text('Input Video')
|
| 497 |
+
# st.sidebar.video(tfflie.name)
|
| 498 |
+
fps = 0
|
| 499 |
+
i = 0
|
| 500 |
+
|
| 501 |
+
|
| 502 |
+
|
| 503 |
+
# cap = cv2.VideoCapture(video_file,)
|
| 504 |
+
totalFrames=int(vid.get(cv2.CAP_PROP_FRAME_COUNT))
|
| 505 |
+
window_size=20
|
| 506 |
+
# print('current_test_df',current_test_df)
|
| 507 |
+
# print('totalFrames',totalFrames)
|
| 508 |
+
window=[]
|
| 509 |
+
|
| 510 |
+
|
| 511 |
+
prevTime = 0
|
| 512 |
+
postfix = info["format"]["format_name"].split(",")[0]
|
| 513 |
+
|
| 514 |
+
with tempfile.NamedTemporaryFile(suffix=f'.{postfix}',delete=False) as tfflie:
|
| 515 |
+
output_file = tfflie.name#'./output.mp4'
|
| 516 |
+
# width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
|
| 517 |
+
# height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
| 518 |
+
fps_input = int(vid.get(cv2.CAP_PROP_FPS))
|
| 519 |
+
|
| 520 |
+
#codec = cv2.VideoWriter_fourcc(*FLAGS.output_format)
|
| 521 |
+
|
| 522 |
+
# codec = cv2.VideoWriter_fourcc('m','p','4','v')
|
| 523 |
+
out = None
|
| 524 |
+
writer=None
|
| 525 |
+
weighted_avg_dict={}
|
| 526 |
+
|
| 527 |
+
idx=0
|
| 528 |
+
|
| 529 |
+
for _, row in current_test_df.iterrows():#enumerate(file_df.rolling(window=20, step=20,min_periods=1)):
|
| 530 |
+
# print(f'captured frame#{idx}')
|
| 531 |
+
if(vid.isOpened()):
|
| 532 |
+
ret, frame = vid.read()
|
| 533 |
+
|
| 534 |
+
|
| 535 |
+
if len(window)<window_size:
|
| 536 |
+
canvas=util.drawStickmodel(frame,eval(row['bodypose_circles']),eval(row['bodypose_sticks']),eval(row['handpose_edges']),eval(row['handpose_peaks']))
|
| 537 |
+
canvas_with_plot=util.draw_bar_plot_below_image(canvas,{}, f'Prediction bar plot - Frame number {idx+1} [** no predictions]',canvas)
|
| 538 |
+
canvas_with_plot=util.draw_bar_plot_below_image(canvas_with_plot,weighted_avg_dict, f'Weighted avg - Frame number {idx+1} [** no predictions]',canvas)
|
| 539 |
+
canvas_with_plot=util.add_padding_to_bottom(canvas_with_plot,(255,255,255),100)# Adds padding at bottom
|
| 540 |
+
|
| 541 |
+
if writer is None:
|
| 542 |
+
input_framesize = canvas_with_plot.shape[:2]
|
| 543 |
+
writer = Writer(output_file, input_fps, input_framesize, input_pix_fmt,
|
| 544 |
+
input_vcodec)
|
| 545 |
+
|
| 546 |
+
# if out is None:
|
| 547 |
+
# out=cv2.VideoWriter(output_file, codec, fps_input, frame.shape[:2])
|
| 548 |
+
|
| 549 |
+
|
| 550 |
+
writer(canvas_with_plot)
|
| 551 |
+
# out.write(canvas)
|
| 552 |
+
with runtime_progress.container():
|
| 553 |
+
df1 = pd.DataFrame([[f'{idx+1}/{current_test_df.shape[0]}','<model will output after 20 frames>']], columns=['Frames Processed','Detected Class'])
|
| 554 |
+
|
| 555 |
+
my_table = st.table(df1)
|
| 556 |
+
window.append(frame)
|
| 557 |
+
# kpi1_text.write(f"<h1 style='text-align: center; color: red;'>{idx+1}/{current_test_df.shape[0]}</h1>", unsafe_allow_html=True)
|
| 558 |
+
# kpi2_text.write(f"<h1 style='text-align: center; color: red;'>--</h1>", unsafe_allow_html=True)
|
| 559 |
+
with view.container():
|
| 560 |
+
st.image(canvas_with_plot,channels = 'BGR',use_column_width=True)
|
| 561 |
+
else:
|
| 562 |
+
|
| 563 |
+
window[:-1] = window[1:]
|
| 564 |
+
window[-1]=frame
|
| 565 |
+
translation_model=get_translator_model()
|
| 566 |
+
# testing_df[]
|
| 567 |
+
|
| 568 |
+
encoded_translation = translation_model(X_test_filtered[idx-20].reshape(1,X_test_filtered[idx-20].shape[0],X_test_filtered[idx-20].shape[1]))
|
| 569 |
+
encoded_translation=encoded_translation[0].cpu().detach().numpy()
|
| 570 |
+
sorted_index=np.argsort(encoded_translation)[::-1]
|
| 571 |
+
maxindex=np.argmax(encoded_translation)
|
| 572 |
+
|
| 573 |
+
top_3_probs = encoded_translation.argsort()[-3:][::-1] # Get indices of top 3 probabilities (descending order)
|
| 574 |
+
top_3_categories = [expression_mapping[i] for i in top_3_probs] # Convert indices to category names (assuming class_names list exists)
|
| 575 |
+
top_3_values = encoded_translation[top_3_probs] # Get corresponding probabilities
|
| 576 |
+
# print(f'{idx} {encoded_translation[maxindex]:0.4f} {maxindex}-{expression_mapping[maxindex]} ')#{[(pi,encoded_translation[pi],expression_mapping[pi]) for pi in sorted_index]}
|
| 577 |
+
for category, prob in zip(top_3_categories, top_3_values):
|
| 578 |
+
if category not in frame_wise_outputs:
|
| 579 |
+
frame_wise_outputs[category]=[]
|
| 580 |
+
frame_wise_outputs[category].append(prob)
|
| 581 |
+
current_prob={}
|
| 582 |
+
|
| 583 |
+
for category, prob in zip(top_3_categories, top_3_values):
|
| 584 |
+
current_prob[category]=prob
|
| 585 |
+
|
| 586 |
+
for key in frame_wise_outputs:
|
| 587 |
+
weighted_avg_dict[key]=weighted_average(frame_wise_outputs[key],[len(frame_wise_outputs[key]) for i in range(len(frame_wise_outputs[key]))])
|
| 588 |
+
|
| 589 |
+
sorted_dict = dict(sorted(weighted_avg_dict.items(), key=lambda item: item[1], reverse=True))
|
| 590 |
+
canvas=util.drawStickmodel(frame,eval(row['bodypose_circles']),eval(row['bodypose_sticks']),eval(row['handpose_edges']),eval(row['handpose_peaks']))
|
| 591 |
+
canvas_with_plot=util.draw_bar_plot_below_image(canvas,current_prob, f'Prediction at frame window({idx-20+1}-{idx+1})',canvas)
|
| 592 |
+
canvas_with_plot=util.draw_bar_plot_below_image(canvas_with_plot,weighted_avg_dict, f'Weighted avg till window {idx+1}',canvas)
|
| 593 |
+
canvas_with_plot=util.add_padding_to_bottom(canvas_with_plot,(255,255,255),100)
|
| 594 |
+
writer(canvas_with_plot)
|
| 595 |
+
|
| 596 |
+
|
| 597 |
+
currTime = time.time()
|
| 598 |
+
fps = 1 / (currTime - prevTime)
|
| 599 |
+
prevTime = currTime
|
| 600 |
+
# out.write(frame)
|
| 601 |
+
# if record:
|
| 602 |
+
# #st.checkbox("Recording", value=True)
|
| 603 |
+
# out.write(frame)
|
| 604 |
+
#Dashboard
|
| 605 |
+
|
| 606 |
+
max_prob = float('-inf') # Initialize with negative infinity
|
| 607 |
+
max_key = None
|
| 608 |
+
|
| 609 |
+
for exp, prob in weighted_avg_dict.items():
|
| 610 |
+
if prob > max_prob:
|
| 611 |
+
max_prob = prob
|
| 612 |
+
max_key = exp
|
| 613 |
+
with runtime_progress.container():
|
| 614 |
+
df1 = pd.DataFrame([[f'{idx+1}/{current_test_df.shape[0]}',f'{max_key} ({max_prob*100:.2f}%)']], columns=['Frames Processed','Detected Class'])
|
| 615 |
+
my_table = st.table(df1)
|
| 616 |
+
# kpi1_text.write(f"<h1 style='text-align: center; color: red;'>{idx+1}/{current_test_df.shape[0]}</h1>", unsafe_allow_html=True)
|
| 617 |
+
# kpi2_text.write(f"<h1 style='text-align: center; color: red;'>{max_key} ({max_prob*100:.2f}%)</h1>", unsafe_allow_html=True)
|
| 618 |
+
# with placeholder.container():
|
| 619 |
+
# # st.write(weighted_avg_dict)
|
| 620 |
+
# # data = {
|
| 621 |
+
# # "I": 0.7350964583456516,
|
| 622 |
+
# # "Hello": 0.1078806109726429,
|
| 623 |
+
# # "you": 0.11776176246348768,
|
| 624 |
+
# # "you (plural)": 0.12685142129916568
|
| 625 |
+
# # }
|
| 626 |
+
|
| 627 |
+
# # Convert the dictionary to a Pandas DataFrame for easier plotting
|
| 628 |
+
# df = pd.DataFrame.from_dict(weighted_avg_dict, orient='index', columns=['Values'])
|
| 629 |
+
|
| 630 |
+
# # Create a bar chart with Streamlit
|
| 631 |
+
# st.bar_chart(df)
|
| 632 |
+
# frame = cv2.resize(frame,(0,0),fx = 0.8 , fy = 0.8)
|
| 633 |
+
# frame = image_resize(image = frame, width = 640)
|
| 634 |
+
with view.container():
|
| 635 |
+
st.image(canvas_with_plot,channels = 'BGR',use_column_width=True)
|
| 636 |
+
|
| 637 |
+
idx=idx+1
|
| 638 |
+
|
| 639 |
+
|
| 640 |
+
# st.text('Video Processed')
|
| 641 |
+
with view.container():
|
| 642 |
+
writer.close()
|
| 643 |
+
# out. release()
|
| 644 |
+
output_video = open(output_file,'rb')
|
| 645 |
+
out_bytes = output_video.read()
|
| 646 |
+
st.video(out_bytes)
|
| 647 |
+
# out.release()
|
| 648 |
+
|
| 649 |
+
print(f'Output file - {output_file}')
|
| 650 |
+
cv2.destroyAllWindows()
|
| 651 |
+
vid.release()
|
| 652 |
+
|
| 653 |
+
|
expression_mapping.py
ADDED
|
@@ -0,0 +1,168 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
expression_mapping={107: "alive",
|
| 2 |
+
58: "Nice",
|
| 3 |
+
8: "Beautiful",
|
| 4 |
+
115: "dead",
|
| 5 |
+
120: "famous",
|
| 6 |
+
122: "female",
|
| 7 |
+
51: "Mean",
|
| 8 |
+
21: "Deaf",
|
| 9 |
+
111: "clean",
|
| 10 |
+
117: "dirty",
|
| 11 |
+
123: "flat",
|
| 12 |
+
110: "cheap",
|
| 13 |
+
119: "expensive",
|
| 14 |
+
116: "deep",
|
| 15 |
+
99: "Ugly",
|
| 16 |
+
114: "curved",
|
| 17 |
+
12: "Blind",
|
| 18 |
+
142: "poor",
|
| 19 |
+
138: "male",
|
| 20 |
+
126: "hard",
|
| 21 |
+
133: "light",
|
| 22 |
+
137: "low",
|
| 23 |
+
113: "cool",
|
| 24 |
+
144: "rich",
|
| 25 |
+
109: "big large",
|
| 26 |
+
108: "bad",
|
| 27 |
+
112: "cold",
|
| 28 |
+
135: "loose",
|
| 29 |
+
121: "fast",
|
| 30 |
+
141: "old",
|
| 31 |
+
130: "high",
|
| 32 |
+
118: "dry",
|
| 33 |
+
145: "sad",
|
| 34 |
+
131: "hot",
|
| 35 |
+
125: "happy",
|
| 36 |
+
129: "heavy",
|
| 37 |
+
128: "healthy",
|
| 38 |
+
124: "good",
|
| 39 |
+
146: "shallow",
|
| 40 |
+
153: "strong",
|
| 41 |
+
161: "weak",
|
| 42 |
+
157: "thin",
|
| 43 |
+
158: "tight",
|
| 44 |
+
136: "loud",
|
| 45 |
+
139: "narrow",
|
| 46 |
+
134: "long",
|
| 47 |
+
156: "thick",
|
| 48 |
+
148: "short",
|
| 49 |
+
152: "soft",
|
| 50 |
+
150: "slow",
|
| 51 |
+
151: "small little",
|
| 52 |
+
149: "sick",
|
| 53 |
+
154: "tall",
|
| 54 |
+
140: "new",
|
| 55 |
+
143: "quiet",
|
| 56 |
+
95: "Today",
|
| 57 |
+
163: "wide",
|
| 58 |
+
159: "warm",
|
| 59 |
+
96: "Tomorrow",
|
| 60 |
+
162: "wet",
|
| 61 |
+
1: "Afternoon",
|
| 62 |
+
27: "Evening",
|
| 63 |
+
56: "Morning",
|
| 64 |
+
59: "Night",
|
| 65 |
+
166: "young",
|
| 66 |
+
53: "Minute",
|
| 67 |
+
38: "Hour",
|
| 68 |
+
88: "Sunday",
|
| 69 |
+
55: "Month",
|
| 70 |
+
94: "Time",
|
| 71 |
+
70: "Pleased",
|
| 72 |
+
63: "Paper",
|
| 73 |
+
105: "Year",
|
| 74 |
+
80: "Second",
|
| 75 |
+
32: "Gift",
|
| 76 |
+
102: "Week",
|
| 77 |
+
43: "Key",
|
| 78 |
+
48: "Lock",
|
| 79 |
+
4: "Bag",
|
| 80 |
+
106: "Yesterday",
|
| 81 |
+
7: "Bathroom",
|
| 82 |
+
15: "Card",
|
| 83 |
+
66: "Pen",
|
| 84 |
+
45: "Letter",
|
| 85 |
+
9: "Bed",
|
| 86 |
+
2: "Alright",
|
| 87 |
+
67: "Pencil",
|
| 88 |
+
24: "Dream",
|
| 89 |
+
13: "Book",
|
| 90 |
+
44: "Kitchen",
|
| 91 |
+
92: "Telephone",
|
| 92 |
+
23: "Door",
|
| 93 |
+
36: "Hello",
|
| 94 |
+
61: "Page",
|
| 95 |
+
40: "How are you",
|
| 96 |
+
16: "Chair",
|
| 97 |
+
89: "Table",
|
| 98 |
+
97: "Tool",
|
| 99 |
+
68: "Photograph",
|
| 100 |
+
10: "Bedroom",
|
| 101 |
+
103: "Window",
|
| 102 |
+
62: "Paint",
|
| 103 |
+
14: "Box",
|
| 104 |
+
76: "Ring",
|
| 105 |
+
82: "Soap",
|
| 106 |
+
20: "Crowd",
|
| 107 |
+
75: "Restaurant",
|
| 108 |
+
98: "Train Station",
|
| 109 |
+
31: "Friend",
|
| 110 |
+
17: "Child",
|
| 111 |
+
0: "Adult",
|
| 112 |
+
46: "Library",
|
| 113 |
+
39: "House",
|
| 114 |
+
42: "India",
|
| 115 |
+
86: "Street or Road",
|
| 116 |
+
72: "Queen",
|
| 117 |
+
85: "Store or Shop",
|
| 118 |
+
64: "Park",
|
| 119 |
+
77: "School",
|
| 120 |
+
18: "City",
|
| 121 |
+
49: "Market",
|
| 122 |
+
60: "Office",
|
| 123 |
+
132: "it",
|
| 124 |
+
41: "I",
|
| 125 |
+
6: "Bank",
|
| 126 |
+
69: "Player",
|
| 127 |
+
147: "she",
|
| 128 |
+
19: "Court",
|
| 129 |
+
155: "they",
|
| 130 |
+
104: "Winter",
|
| 131 |
+
93: "Temple",
|
| 132 |
+
33: "God",
|
| 133 |
+
50: "Marriage",
|
| 134 |
+
29: "Exercise",
|
| 135 |
+
37: "Hospital",
|
| 136 |
+
34: "Ground",
|
| 137 |
+
25: "Election",
|
| 138 |
+
73: "Race (ethnicity)",
|
| 139 |
+
11: "Bill",
|
| 140 |
+
87: "Summer",
|
| 141 |
+
160: "we",
|
| 142 |
+
127: "he",
|
| 143 |
+
22: "Death",
|
| 144 |
+
84: "Spring",
|
| 145 |
+
47: "Location",
|
| 146 |
+
26: "Energy",
|
| 147 |
+
54: "Money",
|
| 148 |
+
28: "Ex. Monsoon",
|
| 149 |
+
165: "you (plural)",
|
| 150 |
+
65: "Peace",
|
| 151 |
+
5: "Ball",
|
| 152 |
+
71: "Price",
|
| 153 |
+
35: "Gun",
|
| 154 |
+
30: "Fall",
|
| 155 |
+
164: "you",
|
| 156 |
+
81: "Sign",
|
| 157 |
+
100: "University",
|
| 158 |
+
83: "Sport",
|
| 159 |
+
74: "Religion",
|
| 160 |
+
101: "War",
|
| 161 |
+
57: "Newspaper",
|
| 162 |
+
3: "Attack",
|
| 163 |
+
90: "Team",
|
| 164 |
+
78: "Science",
|
| 165 |
+
79: "Season",
|
| 166 |
+
52: "Medicine",
|
| 167 |
+
91: "Technology",
|
| 168 |
+
}
|
model.py
ADDED
|
@@ -0,0 +1,407 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
from collections import OrderedDict
|
| 3 |
+
|
| 4 |
+
import torch
|
| 5 |
+
import torch.nn as nn
|
| 6 |
+
|
| 7 |
+
# def make_layers(block, no_relu_layers):
|
| 8 |
+
# layers = []
|
| 9 |
+
# for layer_name, v in block.items():
|
| 10 |
+
# if 'pool' in layer_name:
|
| 11 |
+
# layer = nn.MaxPool2d(kernel_size=v[0], stride=v[1],
|
| 12 |
+
# padding=v[2])
|
| 13 |
+
# layers.append((layer_name, layer))
|
| 14 |
+
# else:
|
| 15 |
+
# conv2d = nn.Conv2d(in_channels=v[0], out_channels=v[1],
|
| 16 |
+
# kernel_size=v[2], stride=v[3],
|
| 17 |
+
# padding=v[4])
|
| 18 |
+
# layers.append((layer_name, conv2d))
|
| 19 |
+
# if layer_name not in no_relu_layers:
|
| 20 |
+
# layers.append(('relu_'+layer_name, nn.ReLU(inplace=True)))
|
| 21 |
+
|
| 22 |
+
# return nn.Sequential(OrderedDict(layers))
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
def make_layers(block, no_relu_layers,prelu_layers = []):
|
| 26 |
+
layers = []
|
| 27 |
+
|
| 28 |
+
for layer_name, v in block.items():
|
| 29 |
+
if 'pool' in layer_name:
|
| 30 |
+
layer = nn.MaxPool2d(kernel_size=v[0], stride=v[1],
|
| 31 |
+
padding=v[2])
|
| 32 |
+
layers.append((layer_name, layer))
|
| 33 |
+
else:
|
| 34 |
+
#[3, 64, 3, 1, 1]
|
| 35 |
+
conv2d = nn.Conv2d(in_channels=v[0], out_channels=v[1],
|
| 36 |
+
kernel_size=v[2], stride=v[3],
|
| 37 |
+
padding=v[4])
|
| 38 |
+
layers.append((layer_name, conv2d))
|
| 39 |
+
if layer_name not in no_relu_layers:
|
| 40 |
+
if layer_name not in prelu_layers:
|
| 41 |
+
layers.append(('relu_'+layer_name, nn.ReLU(inplace=True)))
|
| 42 |
+
else:
|
| 43 |
+
layers.append(('prelu'+layer_name[4:],nn.PReLU(v[1])))
|
| 44 |
+
|
| 45 |
+
return nn.Sequential(OrderedDict(layers))
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
def make_layers_Mconv(block,no_relu_layers):
|
| 49 |
+
modules = []
|
| 50 |
+
for layer_name, v in block.items():
|
| 51 |
+
layers = []
|
| 52 |
+
if 'pool' in layer_name:
|
| 53 |
+
layer = nn.MaxPool2d(kernel_size=v[0], stride=v[1],
|
| 54 |
+
padding=v[2])
|
| 55 |
+
layers.append((layer_name, layer))
|
| 56 |
+
else:
|
| 57 |
+
conv2d = nn.Conv2d(in_channels=v[0], out_channels=v[1],
|
| 58 |
+
kernel_size=v[2], stride=v[3],
|
| 59 |
+
padding=v[4])
|
| 60 |
+
layers.append((layer_name, conv2d))
|
| 61 |
+
if layer_name not in no_relu_layers:
|
| 62 |
+
layers.append(('Mprelu'+layer_name[5:], nn.PReLU(v[1])))
|
| 63 |
+
modules.append(nn.Sequential(OrderedDict(layers)))
|
| 64 |
+
return nn.ModuleList(modules)
|
| 65 |
+
|
| 66 |
+
class bodypose_25_model(nn.Module):
|
| 67 |
+
def __init__(self):
|
| 68 |
+
super(bodypose_25_model,self).__init__()
|
| 69 |
+
# these layers have no relu layer
|
| 70 |
+
no_relu_layers = ['Mconv7_stage0_L1','Mconv7_stage0_L2',\
|
| 71 |
+
'Mconv7_stage1_L1', 'Mconv7_stage1_L2',\
|
| 72 |
+
'Mconv7_stage2_L2', 'Mconv7_stage3_L2']
|
| 73 |
+
prelu_layers = ['conv4_2','conv4_3_CPM','conv4_4_CPM']
|
| 74 |
+
blocks = {}
|
| 75 |
+
block0 = OrderedDict([
|
| 76 |
+
('conv1_1', [3, 64, 3, 1, 1]),
|
| 77 |
+
('conv1_2', [64, 64, 3, 1, 1]),
|
| 78 |
+
('pool1_stage1', [2, 2, 0]),
|
| 79 |
+
('conv2_1', [64, 128, 3, 1, 1]),
|
| 80 |
+
('conv2_2', [128, 128, 3, 1, 1]),
|
| 81 |
+
('pool2_stage1', [2, 2, 0]),
|
| 82 |
+
('conv3_1', [128, 256, 3, 1, 1]),
|
| 83 |
+
('conv3_2', [256, 256, 3, 1, 1]),
|
| 84 |
+
('conv3_3', [256, 256, 3, 1, 1]),
|
| 85 |
+
('conv3_4', [256, 256, 3, 1, 1]),
|
| 86 |
+
('pool3_stage1', [2, 2, 0]),
|
| 87 |
+
('conv4_1', [256, 512, 3, 1, 1]),
|
| 88 |
+
('conv4_2', [512, 512, 3, 1, 1]),
|
| 89 |
+
('conv4_3_CPM', [512, 256, 3, 1, 1]),
|
| 90 |
+
('conv4_4_CPM', [256, 128, 3, 1, 1])
|
| 91 |
+
])
|
| 92 |
+
self.model0 = make_layers(block0, no_relu_layers,prelu_layers)
|
| 93 |
+
|
| 94 |
+
#L2
|
| 95 |
+
#stage0
|
| 96 |
+
blocks['Mconv1_stage0_L2'] = OrderedDict([
|
| 97 |
+
('Mconv1_stage0_L2_0',[128,96,3,1,1]),
|
| 98 |
+
('Mconv1_stage0_L2_1',[96,96,3,1,1]),
|
| 99 |
+
('Mconv1_stage0_L2_2',[96,96,3,1,1])
|
| 100 |
+
])
|
| 101 |
+
for i in range(2,6):
|
| 102 |
+
blocks['Mconv%d_stage0_L2' % i] = OrderedDict([
|
| 103 |
+
('Mconv%d_stage0_L2_0' % i,[288,96,3,1,1]),
|
| 104 |
+
('Mconv%d_stage0_L2_1' % i,[96,96,3,1,1]),
|
| 105 |
+
('Mconv%d_stage0_L2_2' % i,[96,96,3,1,1])
|
| 106 |
+
])
|
| 107 |
+
blocks['Mconv6_7_stage0_L2'] = OrderedDict([
|
| 108 |
+
('Mconv6_stage0_L2',[288, 256, 1,1,0]),
|
| 109 |
+
('Mconv7_stage0_L2',[256,52,1,1,0])
|
| 110 |
+
])
|
| 111 |
+
#stage1~3
|
| 112 |
+
for s in range(1,4):
|
| 113 |
+
blocks['Mconv1_stage%d_L2' % s] = OrderedDict([
|
| 114 |
+
('Mconv1_stage%d_L2_0' % s,[180,128,3,1,1]),
|
| 115 |
+
('Mconv1_stage%d_L2_1' % s,[128,128,3,1,1]),
|
| 116 |
+
('Mconv1_stage%d_L2_2' % s,[128,128,3,1,1])
|
| 117 |
+
])
|
| 118 |
+
for i in range(2,6):
|
| 119 |
+
blocks['Mconv%d_stage%d_L2' % (i,s)] = OrderedDict([
|
| 120 |
+
('Mconv%d_stage%d_L2_0' % (i,s) ,[384,128,3,1,1]),
|
| 121 |
+
('Mconv%d_stage%d_L2_1' % (i,s) ,[128,128,3,1,1]),
|
| 122 |
+
('Mconv%d_stage%d_L2_2' % (i,s) ,[128,128,3,1,1])
|
| 123 |
+
])
|
| 124 |
+
blocks['Mconv6_7_stage%d_L2' % s] = OrderedDict([
|
| 125 |
+
('Mconv6_stage%d_L2' % s,[384,512,1,1,0]),
|
| 126 |
+
('Mconv7_stage%d_L2' % s,[512,52,1,1,0])
|
| 127 |
+
])
|
| 128 |
+
|
| 129 |
+
#L1
|
| 130 |
+
#stage0
|
| 131 |
+
blocks['Mconv1_stage0_L1'] = OrderedDict([
|
| 132 |
+
('Mconv1_stage0_L1_0',[180,96,3,1,1]),
|
| 133 |
+
('Mconv1_stage0_L1_1',[96,96,3,1,1]),
|
| 134 |
+
('Mconv1_stage0_L1_2',[96,96,3,1,1])
|
| 135 |
+
])
|
| 136 |
+
for i in range(2,6):
|
| 137 |
+
blocks['Mconv%d_stage0_L1' % i] = OrderedDict([
|
| 138 |
+
('Mconv%d_stage0_L1_0' % i,[288,96,3,1,1]),
|
| 139 |
+
('Mconv%d_stage0_L1_1' % i,[96,96,3,1,1]),
|
| 140 |
+
('Mconv%d_stage0_L1_2' % i,[96,96,3,1,1])
|
| 141 |
+
])
|
| 142 |
+
blocks['Mconv6_7_stage0_L1'] = OrderedDict([
|
| 143 |
+
('Mconv6_stage0_L1',[288, 256, 1,1,0]),
|
| 144 |
+
('Mconv7_stage0_L1',[256,26,1,1,0])
|
| 145 |
+
])
|
| 146 |
+
#stage1
|
| 147 |
+
blocks['Mconv1_stage1_L1'] = OrderedDict([
|
| 148 |
+
('Mconv1_stage1_L1_0',[206,128,3,1,1]),
|
| 149 |
+
('Mconv1_stage1_L1_1',[128,128,3,1,1]),
|
| 150 |
+
('Mconv1_stage1_L1_2',[128,128,3,1,1])
|
| 151 |
+
])
|
| 152 |
+
for i in range(2,6):
|
| 153 |
+
blocks['Mconv%d_stage1_L1' % i] = OrderedDict([
|
| 154 |
+
('Mconv%d_stage1_L1_0' % i,[384,128,3,1,1]),
|
| 155 |
+
('Mconv%d_stage1_L1_1' % i,[128,128,3,1,1]),
|
| 156 |
+
('Mconv%d_stage1_L1_2' % i,[128,128,3,1,1])
|
| 157 |
+
])
|
| 158 |
+
blocks['Mconv6_7_stage1_L1'] = OrderedDict([
|
| 159 |
+
('Mconv6_stage1_L1',[384,512,1,1,0]),
|
| 160 |
+
('Mconv7_stage1_L1',[512,26,1,1,0])
|
| 161 |
+
])
|
| 162 |
+
|
| 163 |
+
for k in blocks.keys():
|
| 164 |
+
blocks[k] = make_layers_Mconv(blocks[k], no_relu_layers)
|
| 165 |
+
self.models = nn.ModuleDict(blocks)
|
| 166 |
+
#self.model_L2_S0_mconv1 = blocks['Mconv1_stage0_L2']
|
| 167 |
+
for param in self.parameters():
|
| 168 |
+
param.requires_grad = False
|
| 169 |
+
|
| 170 |
+
|
| 171 |
+
def _Mconv_forward(self,x,models):
|
| 172 |
+
outs = []
|
| 173 |
+
out = x
|
| 174 |
+
for m in models:
|
| 175 |
+
out = m(out)
|
| 176 |
+
outs.append(out)
|
| 177 |
+
return torch.cat(outs,1)
|
| 178 |
+
|
| 179 |
+
def forward(self,x):
|
| 180 |
+
out0 = self.model0(x)
|
| 181 |
+
#L2
|
| 182 |
+
tout = out0
|
| 183 |
+
for s in range(4):
|
| 184 |
+
tout = self._Mconv_forward(tout,self.models['Mconv1_stage%d_L2' % s])
|
| 185 |
+
for v in range(2,6):
|
| 186 |
+
tout = self._Mconv_forward(tout,self.models['Mconv%d_stage%d_L2' % (v,s)])
|
| 187 |
+
tout = self.models['Mconv6_7_stage%d_L2' % s][0](tout)
|
| 188 |
+
tout = self.models['Mconv6_7_stage%d_L2' % s][1](tout)
|
| 189 |
+
outL2 = tout
|
| 190 |
+
tout = torch.cat([out0,tout],1)
|
| 191 |
+
#L1 stage0
|
| 192 |
+
#tout = torch.cat([out0,outL2],1)
|
| 193 |
+
tout = self._Mconv_forward(tout, self.models['Mconv1_stage0_L1'])
|
| 194 |
+
for v in range(2,6):
|
| 195 |
+
tout = self._Mconv_forward(tout, self.models['Mconv%d_stage0_L1' % v])
|
| 196 |
+
tout = self.models['Mconv6_7_stage0_L1'][0](tout)
|
| 197 |
+
tout = self.models['Mconv6_7_stage0_L1'][1](tout)
|
| 198 |
+
outS0L1 = tout
|
| 199 |
+
tout = torch.cat([out0,outS0L1,outL2],1)
|
| 200 |
+
#L1 stage1
|
| 201 |
+
tout = self._Mconv_forward(tout, self.models['Mconv1_stage1_L1'])
|
| 202 |
+
for v in range(2,6):
|
| 203 |
+
tout = self._Mconv_forward(tout, self.models['Mconv%d_stage1_L1' % v])
|
| 204 |
+
tout = self.models['Mconv6_7_stage1_L1'][0](tout)
|
| 205 |
+
outS1L1 = self.models['Mconv6_7_stage1_L1'][1](tout)
|
| 206 |
+
|
| 207 |
+
return outL2, outS1L1
|
| 208 |
+
|
| 209 |
+
|
| 210 |
+
class bodypose_model(nn.Module):
|
| 211 |
+
def __init__(self):
|
| 212 |
+
super(bodypose_model, self).__init__()
|
| 213 |
+
|
| 214 |
+
# these layers have no relu layer
|
| 215 |
+
no_relu_layers = ['conv5_5_CPM_L1', 'conv5_5_CPM_L2', 'Mconv7_stage2_L1',\
|
| 216 |
+
'Mconv7_stage2_L2', 'Mconv7_stage3_L1', 'Mconv7_stage3_L2',\
|
| 217 |
+
'Mconv7_stage4_L1', 'Mconv7_stage4_L2', 'Mconv7_stage5_L1',\
|
| 218 |
+
'Mconv7_stage5_L2', 'Mconv7_stage6_L1', 'Mconv7_stage6_L1']
|
| 219 |
+
blocks = {}
|
| 220 |
+
block0 = OrderedDict([
|
| 221 |
+
('conv1_1', [3, 64, 3, 1, 1]),
|
| 222 |
+
('conv1_2', [64, 64, 3, 1, 1]),
|
| 223 |
+
('pool1_stage1', [2, 2, 0]),
|
| 224 |
+
('conv2_1', [64, 128, 3, 1, 1]),
|
| 225 |
+
('conv2_2', [128, 128, 3, 1, 1]),
|
| 226 |
+
('pool2_stage1', [2, 2, 0]),
|
| 227 |
+
('conv3_1', [128, 256, 3, 1, 1]),
|
| 228 |
+
('conv3_2', [256, 256, 3, 1, 1]),
|
| 229 |
+
('conv3_3', [256, 256, 3, 1, 1]),
|
| 230 |
+
('conv3_4', [256, 256, 3, 1, 1]),
|
| 231 |
+
('pool3_stage1', [2, 2, 0]),
|
| 232 |
+
('conv4_1', [256, 512, 3, 1, 1]),
|
| 233 |
+
('conv4_2', [512, 512, 3, 1, 1]),
|
| 234 |
+
('conv4_3_CPM', [512, 256, 3, 1, 1]),
|
| 235 |
+
('conv4_4_CPM', [256, 128, 3, 1, 1])
|
| 236 |
+
])
|
| 237 |
+
|
| 238 |
+
|
| 239 |
+
# Stage 1
|
| 240 |
+
block1_1 = OrderedDict([
|
| 241 |
+
('conv5_1_CPM_L1', [128, 128, 3, 1, 1]),
|
| 242 |
+
('conv5_2_CPM_L1', [128, 128, 3, 1, 1]),
|
| 243 |
+
('conv5_3_CPM_L1', [128, 128, 3, 1, 1]),
|
| 244 |
+
('conv5_4_CPM_L1', [128, 512, 1, 1, 0]),
|
| 245 |
+
('conv5_5_CPM_L1', [512, 38, 1, 1, 0])
|
| 246 |
+
])
|
| 247 |
+
|
| 248 |
+
block1_2 = OrderedDict([
|
| 249 |
+
('conv5_1_CPM_L2', [128, 128, 3, 1, 1]),
|
| 250 |
+
('conv5_2_CPM_L2', [128, 128, 3, 1, 1]),
|
| 251 |
+
('conv5_3_CPM_L2', [128, 128, 3, 1, 1]),
|
| 252 |
+
('conv5_4_CPM_L2', [128, 512, 1, 1, 0]),
|
| 253 |
+
('conv5_5_CPM_L2', [512, 19, 1, 1, 0])
|
| 254 |
+
])
|
| 255 |
+
blocks['block1_1'] = block1_1
|
| 256 |
+
blocks['block1_2'] = block1_2
|
| 257 |
+
|
| 258 |
+
self.model0 = make_layers(block0, no_relu_layers)
|
| 259 |
+
|
| 260 |
+
# Stages 2 - 6
|
| 261 |
+
for i in range(2, 7):
|
| 262 |
+
blocks['block%d_1' % i] = OrderedDict([
|
| 263 |
+
('Mconv1_stage%d_L1' % i, [185, 128, 7, 1, 3]),
|
| 264 |
+
('Mconv2_stage%d_L1' % i, [128, 128, 7, 1, 3]),
|
| 265 |
+
('Mconv3_stage%d_L1' % i, [128, 128, 7, 1, 3]),
|
| 266 |
+
('Mconv4_stage%d_L1' % i, [128, 128, 7, 1, 3]),
|
| 267 |
+
('Mconv5_stage%d_L1' % i, [128, 128, 7, 1, 3]),
|
| 268 |
+
('Mconv6_stage%d_L1' % i, [128, 128, 1, 1, 0]),
|
| 269 |
+
('Mconv7_stage%d_L1' % i, [128, 38, 1, 1, 0])
|
| 270 |
+
])
|
| 271 |
+
|
| 272 |
+
blocks['block%d_2' % i] = OrderedDict([
|
| 273 |
+
('Mconv1_stage%d_L2' % i, [185, 128, 7, 1, 3]),
|
| 274 |
+
('Mconv2_stage%d_L2' % i, [128, 128, 7, 1, 3]),
|
| 275 |
+
('Mconv3_stage%d_L2' % i, [128, 128, 7, 1, 3]),
|
| 276 |
+
('Mconv4_stage%d_L2' % i, [128, 128, 7, 1, 3]),
|
| 277 |
+
('Mconv5_stage%d_L2' % i, [128, 128, 7, 1, 3]),
|
| 278 |
+
('Mconv6_stage%d_L2' % i, [128, 128, 1, 1, 0]),
|
| 279 |
+
('Mconv7_stage%d_L2' % i, [128, 19, 1, 1, 0])
|
| 280 |
+
])
|
| 281 |
+
|
| 282 |
+
for k in blocks.keys():
|
| 283 |
+
blocks[k] = make_layers(blocks[k], no_relu_layers)
|
| 284 |
+
|
| 285 |
+
self.model1_1 = blocks['block1_1']
|
| 286 |
+
self.model2_1 = blocks['block2_1']
|
| 287 |
+
self.model3_1 = blocks['block3_1']
|
| 288 |
+
self.model4_1 = blocks['block4_1']
|
| 289 |
+
self.model5_1 = blocks['block5_1']
|
| 290 |
+
self.model6_1 = blocks['block6_1']
|
| 291 |
+
|
| 292 |
+
self.model1_2 = blocks['block1_2']
|
| 293 |
+
self.model2_2 = blocks['block2_2']
|
| 294 |
+
self.model3_2 = blocks['block3_2']
|
| 295 |
+
self.model4_2 = blocks['block4_2']
|
| 296 |
+
self.model5_2 = blocks['block5_2']
|
| 297 |
+
self.model6_2 = blocks['block6_2']
|
| 298 |
+
for param in self.parameters():
|
| 299 |
+
param.requires_grad = False
|
| 300 |
+
|
| 301 |
+
|
| 302 |
+
def forward(self, x):
|
| 303 |
+
|
| 304 |
+
out1 = self.model0(x)
|
| 305 |
+
|
| 306 |
+
out1_1 = self.model1_1(out1)
|
| 307 |
+
out1_2 = self.model1_2(out1)
|
| 308 |
+
out2 = torch.cat([out1_1, out1_2, out1], 1)
|
| 309 |
+
|
| 310 |
+
out2_1 = self.model2_1(out2)
|
| 311 |
+
out2_2 = self.model2_2(out2)
|
| 312 |
+
out3 = torch.cat([out2_1, out2_2, out1], 1)
|
| 313 |
+
|
| 314 |
+
out3_1 = self.model3_1(out3)
|
| 315 |
+
out3_2 = self.model3_2(out3)
|
| 316 |
+
out4 = torch.cat([out3_1, out3_2, out1], 1)
|
| 317 |
+
|
| 318 |
+
out4_1 = self.model4_1(out4)
|
| 319 |
+
out4_2 = self.model4_2(out4)
|
| 320 |
+
out5 = torch.cat([out4_1, out4_2, out1], 1)
|
| 321 |
+
|
| 322 |
+
out5_1 = self.model5_1(out5)
|
| 323 |
+
out5_2 = self.model5_2(out5)
|
| 324 |
+
out6 = torch.cat([out5_1, out5_2, out1], 1)
|
| 325 |
+
|
| 326 |
+
out6_1 = self.model6_1(out6)
|
| 327 |
+
out6_2 = self.model6_2(out6)
|
| 328 |
+
|
| 329 |
+
return out6_1, out6_2
|
| 330 |
+
|
| 331 |
+
class handpose_model(nn.Module):
|
| 332 |
+
def __init__(self):
|
| 333 |
+
super(handpose_model, self).__init__()
|
| 334 |
+
|
| 335 |
+
# these layers have no relu layer
|
| 336 |
+
no_relu_layers = ['conv6_2_CPM', 'Mconv7_stage2', 'Mconv7_stage3',\
|
| 337 |
+
'Mconv7_stage4', 'Mconv7_stage5', 'Mconv7_stage6']
|
| 338 |
+
# stage 1
|
| 339 |
+
block1_0 = OrderedDict([
|
| 340 |
+
('conv1_1', [3, 64, 3, 1, 1]),
|
| 341 |
+
('conv1_2', [64, 64, 3, 1, 1]),
|
| 342 |
+
('pool1_stage1', [2, 2, 0]),
|
| 343 |
+
('conv2_1', [64, 128, 3, 1, 1]),
|
| 344 |
+
('conv2_2', [128, 128, 3, 1, 1]),
|
| 345 |
+
('pool2_stage1', [2, 2, 0]),
|
| 346 |
+
('conv3_1', [128, 256, 3, 1, 1]),
|
| 347 |
+
('conv3_2', [256, 256, 3, 1, 1]),
|
| 348 |
+
('conv3_3', [256, 256, 3, 1, 1]),
|
| 349 |
+
('conv3_4', [256, 256, 3, 1, 1]),
|
| 350 |
+
('pool3_stage1', [2, 2, 0]),
|
| 351 |
+
('conv4_1', [256, 512, 3, 1, 1]),
|
| 352 |
+
('conv4_2', [512, 512, 3, 1, 1]),
|
| 353 |
+
('conv4_3', [512, 512, 3, 1, 1]),
|
| 354 |
+
('conv4_4', [512, 512, 3, 1, 1]),
|
| 355 |
+
('conv5_1', [512, 512, 3, 1, 1]),
|
| 356 |
+
('conv5_2', [512, 512, 3, 1, 1]),
|
| 357 |
+
('conv5_3_CPM', [512, 128, 3, 1, 1])
|
| 358 |
+
])
|
| 359 |
+
|
| 360 |
+
block1_1 = OrderedDict([
|
| 361 |
+
('conv6_1_CPM', [128, 512, 1, 1, 0]),
|
| 362 |
+
('conv6_2_CPM', [512, 22, 1, 1, 0])
|
| 363 |
+
])
|
| 364 |
+
|
| 365 |
+
blocks = {}
|
| 366 |
+
blocks['block1_0'] = block1_0
|
| 367 |
+
blocks['block1_1'] = block1_1
|
| 368 |
+
|
| 369 |
+
# stage 2-6
|
| 370 |
+
for i in range(2, 7):
|
| 371 |
+
blocks['block%d' % i] = OrderedDict([
|
| 372 |
+
('Mconv1_stage%d' % i, [150, 128, 7, 1, 3]),
|
| 373 |
+
('Mconv2_stage%d' % i, [128, 128, 7, 1, 3]),
|
| 374 |
+
('Mconv3_stage%d' % i, [128, 128, 7, 1, 3]),
|
| 375 |
+
('Mconv4_stage%d' % i, [128, 128, 7, 1, 3]),
|
| 376 |
+
('Mconv5_stage%d' % i, [128, 128, 7, 1, 3]),
|
| 377 |
+
('Mconv6_stage%d' % i, [128, 128, 1, 1, 0]),
|
| 378 |
+
('Mconv7_stage%d' % i, [128, 22, 1, 1, 0])
|
| 379 |
+
])
|
| 380 |
+
|
| 381 |
+
for k in blocks.keys():
|
| 382 |
+
blocks[k] = make_layers(blocks[k], no_relu_layers)
|
| 383 |
+
|
| 384 |
+
self.model1_0 = blocks['block1_0']
|
| 385 |
+
self.model1_1 = blocks['block1_1']
|
| 386 |
+
self.model2 = blocks['block2']
|
| 387 |
+
self.model3 = blocks['block3']
|
| 388 |
+
self.model4 = blocks['block4']
|
| 389 |
+
self.model5 = blocks['block5']
|
| 390 |
+
self.model6 = blocks['block6']
|
| 391 |
+
for param in self.parameters():
|
| 392 |
+
param.requires_grad = False
|
| 393 |
+
|
| 394 |
+
def forward(self, x):
|
| 395 |
+
out1_0 = self.model1_0(x)
|
| 396 |
+
out1_1 = self.model1_1(out1_0)
|
| 397 |
+
concat_stage2 = torch.cat([out1_1, out1_0], 1)
|
| 398 |
+
out_stage2 = self.model2(concat_stage2)
|
| 399 |
+
concat_stage3 = torch.cat([out_stage2, out1_0], 1)
|
| 400 |
+
out_stage3 = self.model3(concat_stage3)
|
| 401 |
+
concat_stage4 = torch.cat([out_stage3, out1_0], 1)
|
| 402 |
+
out_stage4 = self.model4(concat_stage4)
|
| 403 |
+
concat_stage5 = torch.cat([out_stage4, out1_0], 1)
|
| 404 |
+
out_stage5 = self.model5(concat_stage5)
|
| 405 |
+
concat_stage6 = torch.cat([out_stage5, out1_0], 1)
|
| 406 |
+
out_stage6 = self.model6(concat_stage6)
|
| 407 |
+
return out_stage6
|
packages.txt
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
ffmpeg
|
| 2 |
+
libgl1
|
| 3 |
+
libglib2.0-0
|
| 4 |
+
libsm6
|
| 5 |
+
libxrender1
|
| 6 |
+
libxext6
|
requirements.txt
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
opencv_python_headless
|
| 2 |
+
streamlit
|
| 3 |
+
numpy
|
| 4 |
+
Pillow
|
| 5 |
+
numpy
|
| 6 |
+
matplotlib
|
| 7 |
+
opencv-python
|
| 8 |
+
scipy
|
| 9 |
+
scikit-image
|
| 10 |
+
tqdm
|
| 11 |
+
pandas
|
| 12 |
+
torch
|
| 13 |
+
torchaudio
|
| 14 |
+
torchvision
|
| 15 |
+
torchtext
|
| 16 |
+
torchdata
|
| 17 |
+
av
|
| 18 |
+
keras
|
| 19 |
+
ffmpeg
|
| 20 |
+
ffmpeg-python
|
| 21 |
+
seaborn[stats]
|
| 22 |
+
huggingface_hub
|
util.py
ADDED
|
@@ -0,0 +1,463 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
import math
|
| 3 |
+
import cv2
|
| 4 |
+
import matplotlib
|
| 5 |
+
from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas
|
| 6 |
+
from matplotlib.figure import Figure
|
| 7 |
+
import numpy as np
|
| 8 |
+
import matplotlib.pyplot as plt
|
| 9 |
+
import cv2
|
| 10 |
+
import copy
|
| 11 |
+
import seaborn as sns
|
| 12 |
+
|
| 13 |
+
def padRightDownCorner(img, stride, padValue):
|
| 14 |
+
h = img.shape[0]
|
| 15 |
+
w = img.shape[1]
|
| 16 |
+
|
| 17 |
+
pad = 4 * [None]
|
| 18 |
+
pad[0] = 0 # up
|
| 19 |
+
pad[1] = 0 # left
|
| 20 |
+
pad[2] = 0 if (h % stride == 0) else stride - (h % stride) # down
|
| 21 |
+
pad[3] = 0 if (w % stride == 0) else stride - (w % stride) # right
|
| 22 |
+
|
| 23 |
+
img_padded = img
|
| 24 |
+
pad_up = np.tile(img_padded[0:1, :, :]*0 + padValue, (pad[0], 1, 1))
|
| 25 |
+
img_padded = np.concatenate((pad_up, img_padded), axis=0)
|
| 26 |
+
pad_left = np.tile(img_padded[:, 0:1, :]*0 + padValue, (1, pad[1], 1))
|
| 27 |
+
img_padded = np.concatenate((pad_left, img_padded), axis=1)
|
| 28 |
+
pad_down = np.tile(img_padded[-2:-1, :, :]*0 + padValue, (pad[2], 1, 1))
|
| 29 |
+
img_padded = np.concatenate((img_padded, pad_down), axis=0)
|
| 30 |
+
pad_right = np.tile(img_padded[:, -2:-1, :]*0 + padValue, (1, pad[3], 1))
|
| 31 |
+
img_padded = np.concatenate((img_padded, pad_right), axis=1)
|
| 32 |
+
|
| 33 |
+
return img_padded, pad
|
| 34 |
+
|
| 35 |
+
# transfer caffe model to pytorch which will match the layer name
|
| 36 |
+
def transfer(model, model_weights):
|
| 37 |
+
transfered_model_weights = {}
|
| 38 |
+
for weights_name in model.state_dict().keys():
|
| 39 |
+
if len(weights_name.split('.'))>4: # body25
|
| 40 |
+
transfered_model_weights[weights_name] = model_weights['.'.join(
|
| 41 |
+
weights_name.split('.')[3:])]
|
| 42 |
+
else:
|
| 43 |
+
transfered_model_weights[weights_name] = model_weights['.'.join(
|
| 44 |
+
weights_name.split('.')[1:])]
|
| 45 |
+
return transfered_model_weights
|
| 46 |
+
|
| 47 |
+
# draw the body keypoint and lims
|
| 48 |
+
def draw_bodypose(canvas, candidate, subset, model_type='body25'):
|
| 49 |
+
stickwidth = 4
|
| 50 |
+
if model_type == 'body25':
|
| 51 |
+
limbSeq = [[1,0],[1,2],[2,3],[3,4],[1,5],[5,6],[6,7],[1,8],[8,9],[9,10],\
|
| 52 |
+
[10,11],[8,12],[12,13],[13,14],[0,15],[0,16],[15,17],[16,18],\
|
| 53 |
+
[11,24],[11,22],[14,21],[14,19],[22,23],[19,20]]
|
| 54 |
+
njoint = 25
|
| 55 |
+
else:
|
| 56 |
+
limbSeq = [[1, 2], [1, 5], [2, 3], [3, 4], [5, 6], [6, 7], [1, 8], [8, 9], \
|
| 57 |
+
[9, 10], [1, 11], [11, 12], [12, 13], [1, 0], [0, 14], [14, 16], \
|
| 58 |
+
[0, 15], [15, 17], [2, 16], [5, 17]]
|
| 59 |
+
njoint = 18
|
| 60 |
+
|
| 61 |
+
# colors = [[255, 0, 0], [255, 85, 0], [255, 170, 0], [255, 255, 0], [170, 255, 0], [85, 255, 0], [0, 255, 0], \
|
| 62 |
+
# [0, 255, 85], [0, 255, 170], [0, 255, 255], [0, 170, 255], [0, 85, 255], [0, 0, 255], [85, 0, 255], \
|
| 63 |
+
# [170, 0, 255], [255, 0, 255], [255, 0, 170], [255, 0, 85]]
|
| 64 |
+
|
| 65 |
+
colors = [[255, 0, 0], [255, 85, 0], [255, 170, 0], [255, 255, 0], [170, 255, 0], [85, 255, 0], [0, 255, 0], \
|
| 66 |
+
[0, 255, 85], [0, 255, 170], [0, 255, 255], [0, 170, 255], [0, 85, 255], [0, 0, 255], [85, 0, 255], \
|
| 67 |
+
[170, 0, 255], [255, 0, 255], [255, 0, 170], [255, 0, 85], [255,255,0], [255,255,85], [255,255,170],\
|
| 68 |
+
[255,255,255],[170,255,255],[85,255,255],[0,255,255]]
|
| 69 |
+
|
| 70 |
+
for i in range(njoint):
|
| 71 |
+
for n in range(len(subset)):
|
| 72 |
+
index = int(subset[n][i])
|
| 73 |
+
if index == -1:
|
| 74 |
+
continue
|
| 75 |
+
x, y = candidate[index][0:2]
|
| 76 |
+
cv2.circle(canvas, (int(x), int(y)), 4, colors[i], thickness=-1)
|
| 77 |
+
for i in range(njoint-1):
|
| 78 |
+
for n in range(len(subset)):
|
| 79 |
+
index = subset[n][np.array(limbSeq[i])]
|
| 80 |
+
if -1 in index:
|
| 81 |
+
continue
|
| 82 |
+
cur_canvas = canvas.copy()
|
| 83 |
+
Y = candidate[index.astype(int), 0]
|
| 84 |
+
X = candidate[index.astype(int), 1]
|
| 85 |
+
mX = np.mean(X)
|
| 86 |
+
mY = np.mean(Y)
|
| 87 |
+
length = ((X[0] - X[1]) ** 2 + (Y[0] - Y[1]) ** 2) ** 0.5
|
| 88 |
+
angle = math.degrees(math.atan2(X[0] - X[1], Y[0] - Y[1]))
|
| 89 |
+
# print('original (mX,mY,length,angle)',(mX,mY,length,angle))
|
| 90 |
+
# print(f'original cv2.ellipse2Poly((int({mY}), int({mX})), (int({length} / 2), {stickwidth}), int({angle}), 0, 360, 1)')
|
| 91 |
+
polygon = cv2.ellipse2Poly((int(mY), int(mX)), (int(length / 2), stickwidth), int(angle), 0, 360, 1)
|
| 92 |
+
# print(f'cv2.fillConvexPoly(cur_canvas, polygon, colors[i])')
|
| 93 |
+
cv2.fillConvexPoly(cur_canvas, polygon, colors[i])
|
| 94 |
+
canvas = cv2.addWeighted(canvas, 0.4, cur_canvas, 0.6, 0)
|
| 95 |
+
# plt.imsave("preview.jpg", canvas[:, :, [2, 1, 0]])
|
| 96 |
+
# plt.imshow(canvas[:, :, [2, 1, 0]])
|
| 97 |
+
return canvas
|
| 98 |
+
#subsets [[0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, -1.0, 11.0, 12.0, -1.0, 13.0, 14.0, 15.0, 16.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, 26.650803712300775, 17.0]]
|
| 99 |
+
#candidates [[983.0, 172.0, 0.8991263508796692, 0.0], [980.0, 352.0, 0.930037796497345, 1.0], [848.0, 342.0, 0.8652207255363464, 2.0], [811.0, 598.0, 0.8107873797416687, 3.0], [806.0, 817.0, 0.7464589476585388, 4.0], [1120.0, 361.0, 0.8538270592689514, 5.0], [1148.0, 601.0, 0.6797391176223755, 6.0], [1149.0, 834.0, 0.5189468264579773, 7.0], [968.0, 757.0, 0.6468111276626587, 8.0], [876.0, 756.0, 0.6387956142425537, 9.0], [854.0, 1072.0, 0.4211728572845459, 10.0], [1057.0, 759.0, 0.6311940550804138, 11.0], [1038.0, 1072.0, 0.38531172275543213, 12.0], [955.0, 146.0, 0.925083339214325, 13.0], [1016.0, 151.0, 0.9023998379707336, 14.0], [909.0, 167.0, 0.9096773862838745, 15.0], [1057.0, 173.0, 0.8605436086654663, 16.0]]
|
| 100 |
+
def get_bodypose(candidate, subset, model_type='coco'):
|
| 101 |
+
stickwidth = 4
|
| 102 |
+
if model_type == 'body25':
|
| 103 |
+
limbSeq = [[1,0],[1,2],[2,3],[3,4],[1,5],[5,6],[6,7],[1,8],[8,9],[9,10],\
|
| 104 |
+
[10,11],[8,12],[12,13],[13,14],[0,15],[0,16],[15,17],[16,18],\
|
| 105 |
+
[11,24],[11,22],[14,21],[14,19],[22,23],[19,20]]
|
| 106 |
+
njoint = 25
|
| 107 |
+
else:
|
| 108 |
+
limbSeq = [[1, 2], [1, 5], [2, 3], [3, 4], [5, 6], [6, 7], [1, 8], [8, 9], \
|
| 109 |
+
[9, 10], [1, 11], [11, 12], [12, 13], [1, 0], [0, 14], [14, 16], \
|
| 110 |
+
[0, 15], [15, 17], [2, 16], [5, 17]]
|
| 111 |
+
njoint = 18
|
| 112 |
+
|
| 113 |
+
# colors = [[255, 0, 0], [255, 85, 0], [255, 170, 0], [255, 255, 0], [170, 255, 0], [85, 255, 0], [0, 255, 0], \
|
| 114 |
+
# [0, 255, 85], [0, 255, 170], [0, 255, 255], [0, 170, 255], [0, 85, 255], [0, 0, 255], [85, 0, 255], \
|
| 115 |
+
# [170, 0, 255], [255, 0, 255], [255, 0, 170], [255, 0, 85]]
|
| 116 |
+
|
| 117 |
+
colors = [[255, 0, 0], [255, 85, 0], [255, 170, 0], [255, 255, 0], [170, 255, 0], [85, 255, 0], [0, 255, 0], \
|
| 118 |
+
[0, 255, 85], [0, 255, 170], [0, 255, 255], [0, 170, 255], [0, 85, 255], [0, 0, 255], [85, 0, 255], \
|
| 119 |
+
[170, 0, 255], [255, 0, 255], [255, 0, 170], [255, 0, 85], [255,255,0], [255,255,85], [255,255,170],\
|
| 120 |
+
[255,255,255],[170,255,255],[85,255,255],[0,255,255]]
|
| 121 |
+
|
| 122 |
+
x_y_circles=[]
|
| 123 |
+
for i in range(njoint):
|
| 124 |
+
for n in range(len(subset)):
|
| 125 |
+
index = int(subset[n][i])
|
| 126 |
+
if index == -1:
|
| 127 |
+
continue
|
| 128 |
+
x, y = candidate[index][0:2] # 983.0, 172.0
|
| 129 |
+
x_y_circles.append((x, y))
|
| 130 |
+
# cv2.circle(canvas, (int(x), int(y)), 4, colors[i], thickness=-1)
|
| 131 |
+
|
| 132 |
+
x_y_sticks=[]
|
| 133 |
+
for i in range(njoint-1):
|
| 134 |
+
for n in range(len(subset)):
|
| 135 |
+
index = subset[n][np.array(limbSeq[i])] #0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, -1.0, 11.0, 12.0, -1.0, 13.0, 14.0, 15.0, 16.0, -1.0, -1.0, -1.0, -1.0, -1.0
|
| 136 |
+
if -1 in index:
|
| 137 |
+
continue
|
| 138 |
+
# cur_canvas = canvas.copy()
|
| 139 |
+
Y = candidate[index.astype(int), 0]
|
| 140 |
+
X = candidate[index.astype(int), 1]
|
| 141 |
+
mX = np.mean(X)
|
| 142 |
+
mY = np.mean(Y)
|
| 143 |
+
length = ((X[0] - X[1]) ** 2 + (Y[0] - Y[1]) ** 2) ** 0.5
|
| 144 |
+
angle = math.degrees(math.atan2(X[0] - X[1], Y[0] - Y[1]))
|
| 145 |
+
x_y_sticks.append((mY, mX,angle,length))
|
| 146 |
+
# print('new (mX,mY,length,angle)',(mX,mY,length,angle))
|
| 147 |
+
# polygon = cv2.ellipse2Poly((int(mY), int(mX)), (int(length / 2), stickwidth), int(angle), 0, 360, 1)
|
| 148 |
+
# cv2.fillConvexPoly(cur_canvas, polygon, colors[i])
|
| 149 |
+
# canvas = cv2.addWeighted(canvas, 0.4, cur_canvas, 0.6, 0)
|
| 150 |
+
# plt.imsave("preview.jpg", canvas[:, :, [2, 1, 0]])
|
| 151 |
+
# plt.imshow(canvas[:, :, [2, 1, 0]])
|
| 152 |
+
return (x_y_circles,x_y_sticks,)
|
| 153 |
+
|
| 154 |
+
#all_hands_peaks[[[0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [1100, 858], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0]], [[0, 0], [858, 859], [868, 894], [873, 938], [0, 0], [802, 920], [807, 961], [821, 977], [836, 992], [0, 0], [781, 955], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0]]]
|
| 155 |
+
def draw_handpose(canvas, all_hand_peaks, show_number=False):
|
| 156 |
+
edges = [[0, 1], [1, 2], [2, 3], [3, 4], [0, 5], [5, 6], [6, 7], [7, 8], [0, 9], [9, 10], \
|
| 157 |
+
[10, 11], [11, 12], [0, 13], [13, 14], [14, 15], [15, 16], [0, 17], [17, 18], [18, 19], [19, 20]]
|
| 158 |
+
fig = Figure(figsize=plt.figaspect(canvas))
|
| 159 |
+
|
| 160 |
+
fig.subplots_adjust(0, 0, 1, 1)
|
| 161 |
+
fig.subplots_adjust(bottom=0, top=1, left=0, right=1)
|
| 162 |
+
bg = FigureCanvas(fig)
|
| 163 |
+
ax = fig.subplots()
|
| 164 |
+
ax.axis('off')
|
| 165 |
+
ax.imshow(canvas)
|
| 166 |
+
|
| 167 |
+
width, height = ax.figure.get_size_inches() * ax.figure.get_dpi()
|
| 168 |
+
|
| 169 |
+
for peaks in all_hand_peaks:
|
| 170 |
+
for ie, e in enumerate(edges):
|
| 171 |
+
if np.sum(np.all(peaks[e], axis=1)==0)==0:
|
| 172 |
+
x1, y1 = peaks[e[0]]
|
| 173 |
+
x2, y2 = peaks[e[1]]
|
| 174 |
+
# print(f'original ax.plot([{x1}, {x2}], [{y1}, {y2}], color=matplotlib.colors.hsv_to_rgb([ie/float({len(edges)}), 1.0, 1.0]))')
|
| 175 |
+
ax.plot([x1, x2], [y1, y2], color=matplotlib.colors.hsv_to_rgb([ie/float(len(edges)), 1.0, 1.0]))
|
| 176 |
+
|
| 177 |
+
for i, keyponit in enumerate(peaks):
|
| 178 |
+
x, y = keyponit
|
| 179 |
+
# print(f"original ax.plot({x}, {y}, 'r.')")
|
| 180 |
+
ax.plot(x, y, 'r.')
|
| 181 |
+
if show_number:
|
| 182 |
+
ax.text(x, y, str(i))
|
| 183 |
+
# print(f'width = {width}, height={height}')
|
| 184 |
+
bg.draw()
|
| 185 |
+
canvas = np.fromstring(bg.tostring_rgb(), dtype='uint8').reshape(int(height), int(width), 3)
|
| 186 |
+
return canvas
|
| 187 |
+
|
| 188 |
+
def get_handpose(all_hand_peaks, show_number=False):
|
| 189 |
+
edges = [[0, 1], [1, 2], [2, 3], [3, 4], [0, 5], [5, 6], [6, 7], [7, 8], [0, 9], [9, 10], \
|
| 190 |
+
[10, 11], [11, 12], [0, 13], [13, 14], [14, 15], [15, 16], [0, 17], [17, 18], [18, 19], [19, 20]]
|
| 191 |
+
# fig = Figure(figsize=plt.figaspect(canvas))
|
| 192 |
+
|
| 193 |
+
# fig.subplots_adjust(0, 0, 1, 1)
|
| 194 |
+
# fig.subplots_adjust(bottom=0, top=1, left=0, right=1)
|
| 195 |
+
# bg = FigureCanvas(fig)
|
| 196 |
+
# ax = fig.subplots()
|
| 197 |
+
# ax.axis('off')
|
| 198 |
+
# ax.imshow(canvas)
|
| 199 |
+
|
| 200 |
+
# width, height = ax.figure.get_size_inches() * ax.figure.get_dpi()
|
| 201 |
+
export_edges=[[],[]]
|
| 202 |
+
export_peaks=[[],[]]
|
| 203 |
+
for idx,peaks in enumerate(all_hand_peaks):
|
| 204 |
+
for ie, e in enumerate(edges):
|
| 205 |
+
if np.sum(np.all(peaks[e], axis=1)==0)==0:
|
| 206 |
+
x1, y1 = peaks[e[0]]
|
| 207 |
+
x2, y2 = peaks[e[1]]
|
| 208 |
+
export_edges[idx].append((ie,(x1, y1),(x2, y2)))
|
| 209 |
+
# ax.plot([x1, x2], [y1, y2], color=matplotlib.colors.hsv_to_rgb([ie/float(len(edges)), 1.0, 1.0]))
|
| 210 |
+
|
| 211 |
+
for i, keyponit in enumerate(peaks):
|
| 212 |
+
x, y = keyponit
|
| 213 |
+
# ax.plot(x, y, 'r.')
|
| 214 |
+
# if show_number:
|
| 215 |
+
# ax.text(x, y, str(i))
|
| 216 |
+
|
| 217 |
+
export_peaks[idx].append((x,y,str(i)))
|
| 218 |
+
# bg.draw()
|
| 219 |
+
# canvas = np.fromstring(bg.tostring_rgb(), dtype='uint8').reshape(int(height), int(width), 3)
|
| 220 |
+
return (export_edges,export_peaks)
|
| 221 |
+
|
| 222 |
+
# image drawed by opencv is not good.
|
| 223 |
+
def draw_handpose_by_opencv(canvas, peaks, show_number=False):
|
| 224 |
+
edges = [[0, 1], [1, 2], [2, 3], [3, 4], [0, 5], [5, 6], [6, 7], [7, 8], [0, 9], [9, 10], \
|
| 225 |
+
[10, 11], [11, 12], [0, 13], [13, 14], [14, 15], [15, 16], [0, 17], [17, 18], [18, 19], [19, 20]]
|
| 226 |
+
# cv2.rectangle(canvas, (x, y), (x+w, y+w), (0, 255, 0), 2, lineType=cv2.LINE_AA)
|
| 227 |
+
# cv2.putText(canvas, 'left' if is_left else 'right', (x, y), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
|
| 228 |
+
for ie, e in enumerate(edges):
|
| 229 |
+
if np.sum(np.all(peaks[e], axis=1)==0)==0:
|
| 230 |
+
x1, y1 = peaks[e[0]]
|
| 231 |
+
x2, y2 = peaks[e[1]]
|
| 232 |
+
cv2.line(canvas, (x1, y1), (x2, y2), matplotlib.colors.hsv_to_rgb([ie/float(len(edges)), 1.0, 1.0])*255, thickness=2)
|
| 233 |
+
|
| 234 |
+
for i, keyponit in enumerate(peaks):
|
| 235 |
+
x, y = keyponit
|
| 236 |
+
cv2.circle(canvas, (x, y), 4, (0, 0, 255), thickness=-1)
|
| 237 |
+
if show_number:
|
| 238 |
+
cv2.putText(canvas, str(i), (x, y), cv2.FONT_HERSHEY_SIMPLEX, 0.3, (0, 0, 0), lineType=cv2.LINE_AA)
|
| 239 |
+
return canvas
|
| 240 |
+
|
| 241 |
+
# detect hand according to body pose keypoints
|
| 242 |
+
# please refer to https://github.com/CMU-Perceptual-Computing-Lab/openpose/blob/master/src/openpose/hand/handDetector.cpp
|
| 243 |
+
def handDetect(candidate, subset, oriImg):
|
| 244 |
+
# right hand: wrist 4, elbow 3, shoulder 2
|
| 245 |
+
# left hand: wrist 7, elbow 6, shoulder 5
|
| 246 |
+
ratioWristElbow = 0.33
|
| 247 |
+
detect_result = []
|
| 248 |
+
|
| 249 |
+
image_height, image_width = oriImg.shape[0:2]
|
| 250 |
+
#print(f'handDetect ---------- {image_height}, {image_width}')
|
| 251 |
+
for person in subset.astype(int):
|
| 252 |
+
# if any of three not detected
|
| 253 |
+
has_left = np.sum(person[[5, 6, 7]] == -1) == 0
|
| 254 |
+
has_right = np.sum(person[[2, 3, 4]] == -1) == 0
|
| 255 |
+
if not (has_left or has_right):
|
| 256 |
+
continue
|
| 257 |
+
hands = []
|
| 258 |
+
#left hand
|
| 259 |
+
if has_left:
|
| 260 |
+
left_shoulder_index, left_elbow_index, left_wrist_index = person[[5, 6, 7]]
|
| 261 |
+
x1, y1 = candidate[left_shoulder_index][:2]
|
| 262 |
+
x2, y2 = candidate[left_elbow_index][:2]
|
| 263 |
+
x3, y3 = candidate[left_wrist_index][:2]
|
| 264 |
+
hands.append([x1, y1, x2, y2, x3, y3, True])
|
| 265 |
+
# right hand
|
| 266 |
+
if has_right:
|
| 267 |
+
right_shoulder_index, right_elbow_index, right_wrist_index = person[[2, 3, 4]]
|
| 268 |
+
x1, y1 = candidate[right_shoulder_index][:2]
|
| 269 |
+
x2, y2 = candidate[right_elbow_index][:2]
|
| 270 |
+
x3, y3 = candidate[right_wrist_index][:2]
|
| 271 |
+
hands.append([x1, y1, x2, y2, x3, y3, False])
|
| 272 |
+
|
| 273 |
+
for x1, y1, x2, y2, x3, y3, is_left in hands:
|
| 274 |
+
# pos_hand = pos_wrist + ratio * (pos_wrist - pos_elbox) = (1 + ratio) * pos_wrist - ratio * pos_elbox
|
| 275 |
+
# handRectangle.x = posePtr[wrist*3] + ratioWristElbow * (posePtr[wrist*3] - posePtr[elbow*3]);
|
| 276 |
+
# handRectangle.y = posePtr[wrist*3+1] + ratioWristElbow * (posePtr[wrist*3+1] - posePtr[elbow*3+1]);
|
| 277 |
+
# const auto distanceWristElbow = getDistance(poseKeypoints, person, wrist, elbow);
|
| 278 |
+
# const auto distanceElbowShoulder = getDistance(poseKeypoints, person, elbow, shoulder);
|
| 279 |
+
# handRectangle.width = 1.5f * fastMax(distanceWristElbow, 0.9f * distanceElbowShoulder);
|
| 280 |
+
x = x3 + ratioWristElbow * (x3 - x2)
|
| 281 |
+
y = y3 + ratioWristElbow * (y3 - y2)
|
| 282 |
+
distanceWristElbow = math.sqrt((x3 - x2) ** 2 + (y3 - y2) ** 2)
|
| 283 |
+
distanceElbowShoulder = math.sqrt((x2 - x1) ** 2 + (y2 - y1) ** 2)
|
| 284 |
+
width = 1.5 * max(distanceWristElbow, 0.9 * distanceElbowShoulder)
|
| 285 |
+
# x-y refers to the center --> offset to topLeft point
|
| 286 |
+
# handRectangle.x -= handRectangle.width / 2.f;
|
| 287 |
+
# handRectangle.y -= handRectangle.height / 2.f;
|
| 288 |
+
x -= width / 2
|
| 289 |
+
y -= width / 2 # width = height
|
| 290 |
+
# overflow the image
|
| 291 |
+
if x < 0: x = 0
|
| 292 |
+
if y < 0: y = 0
|
| 293 |
+
width1 = width
|
| 294 |
+
width2 = width
|
| 295 |
+
if x + width > image_width: width1 = image_width - x
|
| 296 |
+
if y + width > image_height: width2 = image_height - y
|
| 297 |
+
width = min(width1, width2)
|
| 298 |
+
# the max hand box value is 20 pixels
|
| 299 |
+
if width >= 20:
|
| 300 |
+
detect_result.append([int(x), int(y), int(width), is_left])
|
| 301 |
+
|
| 302 |
+
'''
|
| 303 |
+
return value: [[x, y, w, True if left hand else False]].
|
| 304 |
+
width=height since the network require squared input.
|
| 305 |
+
x, y is the coordinate of top left
|
| 306 |
+
'''
|
| 307 |
+
return detect_result
|
| 308 |
+
|
| 309 |
+
def drawStickmodel(oriImg,x_ytupple,x_y_sticks,export_edges,export_peaks):
|
| 310 |
+
canvas = copy.deepcopy(oriImg)
|
| 311 |
+
|
| 312 |
+
|
| 313 |
+
colors = [[255, 0, 0], [255, 85, 0], [255, 170, 0], [255, 255, 0], [170, 255, 0], [85, 255, 0], [0, 255, 0],
|
| 314 |
+
[0, 255, 85], [0, 255, 170], [0, 255, 255], [0, 170, 255], [0, 85, 255], [0, 0, 255], [85, 0, 255],
|
| 315 |
+
[170, 0, 255], [255, 0, 255], [255, 0, 170], [255, 0, 85], [255,255,0], [255,255,85], [255,255,170],
|
| 316 |
+
[255,255,255],[170,255,255],[85,255,255],[0,255,255]]
|
| 317 |
+
stickwidth=4
|
| 318 |
+
|
| 319 |
+
for idx,(mX,mY,angle,length) in enumerate(x_y_sticks):
|
| 320 |
+
cur_canvas = canvas.copy()
|
| 321 |
+
# print(f'new cv2.ellipse2Poly((int({mY}), int({mX})), (int({length} / 2), {stickwidth}), int({angle}), 0, 360, 1)')
|
| 322 |
+
polygon = cv2.ellipse2Poly((int(mX), int(mY)), (int(length / 2), stickwidth), int(angle), 0, 360, 1)
|
| 323 |
+
cv2.fillConvexPoly(cur_canvas, polygon, colors[idx])
|
| 324 |
+
canvas = cv2.addWeighted(canvas, 0.4, cur_canvas, 0.6, 0)
|
| 325 |
+
|
| 326 |
+
|
| 327 |
+
|
| 328 |
+
for idx,(x,y) in enumerate(x_ytupple):
|
| 329 |
+
cv2.circle(canvas, (int(x), int(y)), 4, colors[idx], thickness=-1)
|
| 330 |
+
|
| 331 |
+
|
| 332 |
+
## Handpose
|
| 333 |
+
fig = Figure(figsize=plt.figaspect(canvas))
|
| 334 |
+
fig.subplots_adjust(0, 0, 1, 1)
|
| 335 |
+
fig.subplots_adjust(bottom=0, top=1, left=0, right=1)
|
| 336 |
+
bg = FigureCanvas(fig)
|
| 337 |
+
ax = fig.subplots()
|
| 338 |
+
ax.axis('off')
|
| 339 |
+
ax.imshow(canvas)
|
| 340 |
+
|
| 341 |
+
edges = [[0, 1], [1, 2], [2, 3], [3, 4], [0, 5], [5, 6], [6, 7], [7, 8], [0, 9], [9, 10], \
|
| 342 |
+
[10, 11], [11, 12], [0, 13], [13, 14], [14, 15], [15, 16], [0, 17], [17, 18], [18, 19], [19, 20]]
|
| 343 |
+
|
| 344 |
+
for both_hand_edges in export_edges:
|
| 345 |
+
for (ie,(x1, y1),(x2, y2)) in both_hand_edges:
|
| 346 |
+
# print(f'new ax.plot([{x1}, {x2}], [{y1}, {y2}], color=matplotlib.colors.hsv_to_rgb([ie/float({len(edges)}), 1.0, 1.0]))')
|
| 347 |
+
ax.plot([x1, x2], [y1, y2], color=matplotlib.colors.hsv_to_rgb([ie/float(len(edges)), 1.0, 1.0]))
|
| 348 |
+
|
| 349 |
+
width, height = ax.figure.get_size_inches() * ax.figure.get_dpi()
|
| 350 |
+
|
| 351 |
+
for both_hand_peaks in export_peaks:
|
| 352 |
+
for (x,y,text) in both_hand_peaks:
|
| 353 |
+
# print(f"new ax.plot({x}, {y}, 'r.')")
|
| 354 |
+
ax.plot(x, y, 'r.')
|
| 355 |
+
|
| 356 |
+
|
| 357 |
+
|
| 358 |
+
# print(f'NEW width = {width}, height={height}')
|
| 359 |
+
bg.draw()
|
| 360 |
+
|
| 361 |
+
canvas = np.fromstring(bg.tostring_rgb(), dtype='uint8').reshape(int(height), int(width), 3)
|
| 362 |
+
|
| 363 |
+
####
|
| 364 |
+
|
| 365 |
+
|
| 366 |
+
# cv2.imwrite('C:/Users/spsar/Downloads/MVI_5177.MOV-transformed/MVI_5177.MOV-GaussianBlur/MVI_5177.MOV-14-modified.jpg', canvas)
|
| 367 |
+
return cv2.resize(canvas,(math.ceil(width),math.ceil(height)))
|
| 368 |
+
|
| 369 |
+
def draw_bar_plot_below_image(image, predictions, title, origImg):
|
| 370 |
+
"""
|
| 371 |
+
Draws a bar plot of predictions below an image using OpenCV and Matplotlib.
|
| 372 |
+
|
| 373 |
+
Args:
|
| 374 |
+
image (numpy.ndarray): The image to display.
|
| 375 |
+
predictions (numpy.ndarray): Array containing prediction probabilities.
|
| 376 |
+
"""
|
| 377 |
+
|
| 378 |
+
|
| 379 |
+
|
| 380 |
+
fig, ax = plt.subplots(figsize=(origImg.shape[1]/100,origImg.shape[0]/200), dpi=100)
|
| 381 |
+
plt.title(title)
|
| 382 |
+
# Create a figure and plot the bar chart
|
| 383 |
+
labels = list(predictions.keys())
|
| 384 |
+
probabilities = list(predictions.values())
|
| 385 |
+
|
| 386 |
+
# Create a Seaborn bar plot
|
| 387 |
+
sns.barplot(x=labels, y=probabilities,ax=ax) # Default color palette used
|
| 388 |
+
plt.close(fig) # Close plot to avoid memory leaks
|
| 389 |
+
fig.canvas.draw()
|
| 390 |
+
# Convert the plot to a NumPy array for manipulation
|
| 391 |
+
plot_image = np.array(fig.canvas.renderer.buffer_rgba())[:, :, :3] # Remove alpha channel
|
| 392 |
+
|
| 393 |
+
# Resize the plot image to match the width of the original image
|
| 394 |
+
# plot_image = cv2.resize(plot_image, (image.shape[1], math.ceil(image.shape[0] * 0.8))) # Adjust height ratio as needed
|
| 395 |
+
|
| 396 |
+
# Combine the image and plot image vertically (stacking)
|
| 397 |
+
combined_image = np.vstack((image, cv2.resize(plot_image,(image.shape[1],plot_image.shape[0]))))
|
| 398 |
+
|
| 399 |
+
return combined_image
|
| 400 |
+
|
| 401 |
+
def add_padding_to_bottom(image, pad_value, pad_height):
|
| 402 |
+
"""
|
| 403 |
+
Adds padding to the bottom of an image with a specified value.
|
| 404 |
+
|
| 405 |
+
Args:
|
| 406 |
+
image (numpy.ndarray): The input image.
|
| 407 |
+
pad_value (tuple or int): The color value to fill the padding area.
|
| 408 |
+
pad_height (int): The height of the padding to add at the bottom.
|
| 409 |
+
|
| 410 |
+
Returns:
|
| 411 |
+
numpy.ndarray: The image with padding added.
|
| 412 |
+
"""
|
| 413 |
+
|
| 414 |
+
# Get image dimensions
|
| 415 |
+
height, width, channels = image.shape
|
| 416 |
+
padding=np.zeros((pad_height, width, channels), dtype=image.dtype)
|
| 417 |
+
padding[:,:,:]=pad_value
|
| 418 |
+
# # Create a new image with the desired height
|
| 419 |
+
# padded_image = np.zeros((height + pad_height, width, channels), dtype=image.dtype)
|
| 420 |
+
|
| 421 |
+
# # Copy the original image to the top of the padded image
|
| 422 |
+
# padded_image[:height, :, :] = image
|
| 423 |
+
|
| 424 |
+
# # Fill the padding area with the specified value
|
| 425 |
+
# if isinstance(pad_value, tuple): # Check for multiple color values (e.g., BGR)
|
| 426 |
+
# padded_image[height:, :, :] = pad_value
|
| 427 |
+
# else: # Single value for all channels (e.g., black)
|
| 428 |
+
# padded_image[height:, :, :] = np.full((pad_height, width, 1), pad_value, dtype=image.dtype)
|
| 429 |
+
|
| 430 |
+
return np.vstack((image, padding))
|
| 431 |
+
|
| 432 |
+
def crop_to_drawing(image):
|
| 433 |
+
"""
|
| 434 |
+
Crops an image to the tight bounding rectangle of non-zero pixels.
|
| 435 |
+
|
| 436 |
+
Args:
|
| 437 |
+
image: A NumPy array representing the image.
|
| 438 |
+
|
| 439 |
+
Returns:
|
| 440 |
+
A cropped image (NumPy array) containing only the drawing area.
|
| 441 |
+
"""
|
| 442 |
+
image=np.transpose(image, (2, 0, 1))
|
| 443 |
+
united_x,united_h=0,0
|
| 444 |
+
for channel in np.arange(image.shape[0]):
|
| 445 |
+
x, y, w, h = cv2.boundingRect(image[channel])
|
| 446 |
+
if x>united_x:
|
| 447 |
+
united_x=x
|
| 448 |
+
|
| 449 |
+
if h>united_h:
|
| 450 |
+
united_h=h
|
| 451 |
+
|
| 452 |
+
for channel in np.arange(image.shape[0]):
|
| 453 |
+
# Crop the image
|
| 454 |
+
image[channel] = image[channel][y:y+united_h, x:x+united_x]
|
| 455 |
+
return image.transpose(image, (1,2,0))
|
| 456 |
+
|
| 457 |
+
# get max index of 2d array
|
| 458 |
+
def npmax(array):
|
| 459 |
+
arrayindex = array.argmax(1)
|
| 460 |
+
arrayvalue = array.max(1)
|
| 461 |
+
i = arrayvalue.argmax()
|
| 462 |
+
j = arrayindex[i]
|
| 463 |
+
return i, j
|