Sunil Sarolkar commited on
Commit
9c63ccc
·
0 Parent(s):

Fresh reset: clean Space without LFS history

Browse files
Files changed (7) hide show
  1. ISL_Model_parameter.py +662 -0
  2. app.py +653 -0
  3. expression_mapping.py +168 -0
  4. model.py +407 -0
  5. packages.txt +6 -0
  6. requirements.txt +22 -0
  7. util.py +463 -0
ISL_Model_parameter.py ADDED
@@ -0,0 +1,662 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import keras
2
+ from keras.layers import TorchModuleWrapper
3
+ import numpy as np
4
+ import cv2
5
+ import torch
6
+ from scipy.ndimage.filters import gaussian_filter
7
+ import math
8
+ import os
9
+ import numpy as np
10
+ from skimage.measure import label
11
+ import util as util
12
+
13
+
14
+ class ISLSignPos(keras.Model):
15
+ def __init__(self,pt_body_model,pt_hand_model):
16
+ super().__init__()
17
+ self.pt_body = TorchModuleWrapper(pt_body_model)
18
+ self.pt_body.trainable=False
19
+ self.pt_hand = TorchModuleWrapper(pt_hand_model)
20
+ self.pt_hand.trainable=False
21
+ self.njoint_body = 26
22
+ self.npaf_body = 52
23
+
24
+ def call(self, oriImg):
25
+ candidate, subset = self.bodypos(oriImg.cpu().numpy())
26
+ hands_list = util.handDetect(candidate, subset, oriImg.cpu().numpy())
27
+ all_hand_peaks = []
28
+ for x, y, w, is_left in hands_list:
29
+ peaks = self.handpos(oriImg.cpu().numpy()[y:y+w, x:x+w, :])
30
+ peaks[:, 0] = np.where(peaks[:, 0]==0, peaks[:, 0], peaks[:, 0]+x)
31
+ peaks[:, 1] = np.where(peaks[:, 1]==0, peaks[:, 1], peaks[:, 1]+y)
32
+ all_hand_peaks.append(peaks)
33
+ return (candidate, subset,all_hand_peaks)
34
+
35
+ def bodypos(self, oriImg):
36
+ model_type = 'body25'
37
+ scale_search = [0.5]
38
+ boxsize = 368
39
+ stride = 8
40
+ padValue = 128
41
+ thre1 = 0.1
42
+ thre2 = 0.05
43
+ multiplier = [x * boxsize / oriImg.shape[0] for x in scale_search]
44
+ heatmap_avg = np.zeros((oriImg.shape[0], oriImg.shape[1], self.njoint_body))
45
+ paf_avg = np.zeros((oriImg.shape[0], oriImg.shape[1], self.npaf_body))
46
+
47
+ for m in range(len(multiplier)):
48
+ scale = multiplier[m]
49
+ imageToTest = cv2.resize(oriImg, (0, 0), fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC)
50
+ imageToTest_padded, pad = util.padRightDownCorner(imageToTest, stride, padValue)
51
+ im = np.transpose(np.float32(imageToTest_padded[:, :, :, np.newaxis]), (3, 2, 0, 1)) / 256 - 0.5
52
+ im = np.ascontiguousarray(im)
53
+
54
+ data = torch.from_numpy(im).float()
55
+ if torch.cuda.is_available():
56
+ data = data.cuda()
57
+ with torch.no_grad():
58
+ Mconv7_stage6_L1, Mconv7_stage6_L2 = self.pt_body(data)
59
+ Mconv7_stage6_L1 = Mconv7_stage6_L1.cpu().numpy()
60
+ Mconv7_stage6_L2 = Mconv7_stage6_L2.cpu().numpy()
61
+
62
+ heatmap = np.transpose(np.squeeze(Mconv7_stage6_L2), (1, 2, 0)) # output 1 is heatmaps
63
+ heatmap = cv2.resize(heatmap, (0, 0), fx=stride, fy=stride, interpolation=cv2.INTER_CUBIC)
64
+ heatmap = heatmap[:imageToTest_padded.shape[0] - pad[2], :imageToTest_padded.shape[1] - pad[3], :]
65
+ heatmap = cv2.resize(heatmap, (oriImg.shape[1], oriImg.shape[0]), interpolation=cv2.INTER_CUBIC)
66
+
67
+ paf = np.transpose(np.squeeze(Mconv7_stage6_L1), (1, 2, 0)) # output 0 is PAFs
68
+ paf = cv2.resize(paf, (0, 0), fx=stride, fy=stride, interpolation=cv2.INTER_CUBIC)
69
+ paf = paf[:imageToTest_padded.shape[0] - pad[2], :imageToTest_padded.shape[1] - pad[3], :]
70
+ paf = cv2.resize(paf, (oriImg.shape[1], oriImg.shape[0]), interpolation=cv2.INTER_CUBIC)
71
+
72
+ heatmap_avg += heatmap_avg + heatmap / len(multiplier)
73
+ paf_avg += + paf / len(multiplier)
74
+
75
+ all_peaks = []
76
+ peak_counter = 0
77
+
78
+ for part in range(self.njoint_body-1):
79
+ map_ori = heatmap_avg[:, :, part]
80
+ one_heatmap = gaussian_filter(map_ori, sigma=3)
81
+
82
+ map_left = np.zeros(one_heatmap.shape)
83
+ map_left[1:, :] = one_heatmap[:-1, :]
84
+ map_right = np.zeros(one_heatmap.shape)
85
+ map_right[:-1, :] = one_heatmap[1:, :]
86
+ map_up = np.zeros(one_heatmap.shape)
87
+ map_up[:, 1:] = one_heatmap[:, :-1]
88
+ map_down = np.zeros(one_heatmap.shape)
89
+ map_down[:, :-1] = one_heatmap[:, 1:]
90
+
91
+ peaks_binary = np.logical_and.reduce(
92
+ (one_heatmap >= map_left, one_heatmap >= map_right, one_heatmap >= map_up, one_heatmap >= map_down, one_heatmap > thre1))
93
+ peaks = list(zip(np.nonzero(peaks_binary)[1], np.nonzero(peaks_binary)[0])) # note reverse
94
+ peaks_with_score = [x + (map_ori[x[1], x[0]],) for x in peaks]
95
+ peak_id = range(peak_counter, peak_counter + len(peaks))
96
+ peaks_with_score_and_id = [peaks_with_score[i] + (peak_id[i],) for i in range(len(peak_id))]
97
+
98
+ all_peaks.append(peaks_with_score_and_id)
99
+ peak_counter += len(peaks)
100
+
101
+ if model_type=='body25':
102
+ # find connection in the specified sequence, center 29 is in the position 15
103
+ limbSeq = [[1,0],[1,2],[2,3],[3,4],[1,5],[5,6],[6,7],[1,8],[8,9],[9,10],\
104
+ [10,11],[8,12],[12,13],[13,14],[0,15],[0,16],[15,17],[16,18],\
105
+ [11,24],[11,22],[14,21],[14,19],[22,23],[19,20]]
106
+ # the middle joints heatmap correpondence
107
+ mapIdx = [[30, 31],[14, 15],[16, 17],[18, 19],[22, 23],[24, 25],[26, 27],[0, 1],[6, 7],\
108
+ [2, 3],[4, 5], [8, 9],[10, 11],[12, 13],[32, 33],[34, 35],[36,37],[38,39],\
109
+ [50,51],[46,47],[44,45],[40,41],[48,49],[42,43]]
110
+ else:
111
+ # find connection in the specified sequence, center 29 is in the position 15
112
+ limbSeq = [[1, 2], [1, 5], [2, 3], [3, 4], [5, 6], [6, 7], [1, 8], [8, 9], \
113
+ [9, 10], [1, 11], [11, 12], [12, 13], [1, 0], [0, 14], [14, 16], \
114
+ [0, 15], [15, 17], [2, 16], [5, 17]]
115
+ # the middle joints heatmap correpondence
116
+ mapIdx = [[12, 13], [20, 21], [14, 15], [16, 17], [22, 23], [24, 25], [0, 1], [2, 3], \
117
+ [4, 5], [6, 7], [8, 9], [10, 11], [28, 29], [30, 31], [34, 35], [32, 33], \
118
+ [36, 37], [18, 19], [26, 27]]
119
+
120
+ connection_all = []
121
+ special_k = []
122
+ mid_num = 10
123
+
124
+ for k in range(len(mapIdx)):
125
+ score_mid = paf_avg[:, :, mapIdx[k]]
126
+ candA = all_peaks[limbSeq[k][0]]
127
+ candB = all_peaks[limbSeq[k][1]]
128
+
129
+ nA = len(candA)
130
+ nB = len(candB)
131
+ indexA, indexB = limbSeq[k]
132
+ if (nA != 0 and nB != 0):
133
+ connection_candidate = []
134
+ for i in range(nA):
135
+ for j in range(nB):
136
+ vec = np.subtract(candB[j][:2], candA[i][:2])
137
+ norm = math.sqrt(vec[0] * vec[0] + vec[1] * vec[1])
138
+ norm = max(0.001, norm)
139
+ vec = np.divide(vec, norm)
140
+
141
+ startend = list(zip(np.linspace(candA[i][0], candB[j][0], num=mid_num), \
142
+ np.linspace(candA[i][1], candB[j][1], num=mid_num)))
143
+
144
+ vec_x = np.array([score_mid[int(round(startend[I][1])), int(round(startend[I][0])), 0] \
145
+ for I in range(len(startend))])
146
+ vec_y = np.array([score_mid[int(round(startend[I][1])), int(round(startend[I][0])), 1] \
147
+ for I in range(len(startend))])
148
+
149
+ score_midpts = np.multiply(vec_x, vec[0]) + np.multiply(vec_y, vec[1])
150
+ score_with_dist_prior = sum(score_midpts) / len(score_midpts) + min(
151
+ 0.5 * oriImg.shape[0] / norm - 1, 0)
152
+ criterion1 = len(np.nonzero(score_midpts > thre2)[0]) > 0.8 * len(score_midpts)
153
+ criterion2 = score_with_dist_prior > 0
154
+ if criterion1 and criterion2:
155
+ connection_candidate.append(
156
+ [i, j, score_with_dist_prior, score_with_dist_prior + candA[i][2] + candB[j][2]])
157
+
158
+ connection_candidate = sorted(connection_candidate, key=lambda x: x[2], reverse=True)
159
+ connection = np.zeros((0, 5))
160
+ for c in range(len(connection_candidate)):
161
+ i, j, s = connection_candidate[c][0:3]
162
+ if (i not in connection[:, 3] and j not in connection[:, 4]):
163
+ connection = np.vstack([connection, [candA[i][3], candB[j][3], s, i, j]])
164
+ if (len(connection) >= min(nA, nB)):
165
+ break
166
+
167
+ connection_all.append(connection)
168
+ else:
169
+ special_k.append(k)
170
+ connection_all.append([])
171
+
172
+ # last number in each row is the total parts number of that person
173
+ # the second last number in each row is the score of the overall configuration
174
+ subset = -1 * np.ones((0, self.njoint_body+1))
175
+ candidate = np.array([item for sublist in all_peaks for item in sublist])
176
+
177
+ for k in range(len(mapIdx)):
178
+ if k not in special_k:
179
+ partAs = connection_all[k][:, 0]
180
+ partBs = connection_all[k][:, 1]
181
+ indexA, indexB = np.array(limbSeq[k])
182
+
183
+ for i in range(len(connection_all[k])): # = 1:size(temp,1)
184
+ found = 0
185
+ subset_idx = [-1, -1]
186
+ for j in range(len(subset)): # 1:size(subset,1):
187
+ if subset[j][indexA] == partAs[i] or subset[j][indexB] == partBs[i]:
188
+ subset_idx[found] = j
189
+ found += 1
190
+
191
+ if found == 1:
192
+ j = subset_idx[0]
193
+ if subset[j][indexB] != partBs[i]:
194
+ subset[j][indexB] = partBs[i]
195
+ subset[j][-1] += 1
196
+ subset[j][-2] += candidate[partBs[i].astype(int), 2] + connection_all[k][i][2]
197
+ elif found == 2: # if found 2 and disjoint, merge them
198
+ j1, j2 = subset_idx
199
+ membership = ((subset[j1] >= 0).astype(int) + (subset[j2] >= 0).astype(int))[:-2]
200
+ if len(np.nonzero(membership == 2)[0]) == 0: # merge
201
+ subset[j1][:-2] += (subset[j2][:-2] + 1)
202
+ subset[j1][-2:] += subset[j2][-2:]
203
+ subset[j1][-2] += connection_all[k][i][2]
204
+ subset = np.delete(subset, j2, 0)
205
+ else: # as like found == 1
206
+ subset[j1][indexB] = partBs[i]
207
+ subset[j1][-1] += 1
208
+ subset[j1][-2] += candidate[partBs[i].astype(int), 2] + connection_all[k][i][2]
209
+
210
+ # if find no partA in the subset, create a new subset
211
+ elif not found and k < self.njoint_body-2:
212
+ row = -1 * np.ones(self.njoint_body+1)
213
+ row[indexA] = partAs[i]
214
+ row[indexB] = partBs[i]
215
+ row[-1] = 2
216
+ row[-2] = sum(candidate[connection_all[k][i, :2].astype(int), 2]) + connection_all[k][i][2]
217
+ subset = np.vstack([subset, row])
218
+ # delete some rows of subset which has few parts occur
219
+ deleteIdx = []
220
+ for i in range(len(subset)):
221
+ if subset[i][-1] < 4 or subset[i][-2] / subset[i][-1] < 0.4:
222
+ deleteIdx.append(i)
223
+ subset = np.delete(subset, deleteIdx, axis=0)
224
+
225
+ # subset: n*20 array, 0-17 is the index in candidate, 18 is the total score, 19 is the total parts
226
+ # candidate: x, y, score, id
227
+ return candidate, subset
228
+
229
+ def handpos(self, oriImg):
230
+ scale_search = [0.5, 1.0, 1.5, 2.0]
231
+ # scale_search = [0.5]
232
+ boxsize = 368
233
+ stride = 8
234
+ padValue = 128
235
+ thre = 0.05
236
+ multiplier = [x * boxsize / oriImg.shape[0] for x in scale_search]
237
+ heatmap_avg = np.zeros((oriImg.shape[0], oriImg.shape[1], 22))
238
+ # paf_avg = np.zeros((oriImg.shape[0], oriImg.shape[1], 38))
239
+
240
+ for m in range(len(multiplier)):
241
+ scale = multiplier[m]
242
+ imageToTest = cv2.resize(oriImg, (0, 0), fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC)
243
+ imageToTest_padded, pad = util.padRightDownCorner(imageToTest, stride, padValue)
244
+ im = np.transpose(np.float32(imageToTest_padded[:, :, :, np.newaxis]), (3, 2, 0, 1)) / 256 - 0.5
245
+ im = np.ascontiguousarray(im)
246
+
247
+ data = torch.from_numpy(im).float()
248
+ if torch.cuda.is_available():
249
+ data = data.cuda()
250
+ # data = data.permute([2, 0, 1]).unsqueeze(0).float()
251
+ with torch.no_grad():
252
+ output = self.pt_hand(data).cpu().numpy()
253
+ # output = self.model(data).numpy()q
254
+
255
+ # extract outputs, resize, and remove padding
256
+ heatmap = np.transpose(np.squeeze(output), (1, 2, 0)) # output 1 is heatmaps
257
+ heatmap = cv2.resize(heatmap, (0, 0), fx=stride, fy=stride, interpolation=cv2.INTER_CUBIC)
258
+ heatmap = heatmap[:imageToTest_padded.shape[0] - pad[2], :imageToTest_padded.shape[1] - pad[3], :]
259
+ heatmap = cv2.resize(heatmap, (oriImg.shape[1], oriImg.shape[0]), interpolation=cv2.INTER_CUBIC)
260
+
261
+ heatmap_avg += heatmap / len(multiplier)
262
+
263
+ all_peaks = []
264
+ for part in range(21):
265
+ map_ori = heatmap_avg[:, :, part]
266
+ one_heatmap = gaussian_filter(map_ori, sigma=3)
267
+ binary = np.ascontiguousarray(one_heatmap > thre, dtype=np.uint8)
268
+ # 全部小于阈值
269
+ if np.sum(binary) == 0:
270
+ all_peaks.append([0, 0])
271
+ continue
272
+ label_img, label_numbers = label(binary, return_num=True, connectivity=binary.ndim)
273
+ max_index = np.argmax([np.sum(map_ori[label_img == i]) for i in range(1, label_numbers + 1)]) + 1
274
+ label_img[label_img != max_index] = 0
275
+ map_ori[label_img == 0] = 0
276
+
277
+ y, x = util.npmax(map_ori)
278
+ all_peaks.append([x, y])
279
+ return np.array(all_peaks)
280
+
281
+ class ISLSignPosTranslator(keras.Model):
282
+
283
+ def __init__(self,body_model,hand_model, translation_model):
284
+ super().__init__()
285
+ self.pt_body = TorchModuleWrapper(body_model)
286
+ self.pt_body.trainable=False
287
+ self.pt_hand = TorchModuleWrapper(hand_model)
288
+ self.pt_hand.trainable=False
289
+
290
+ self.njoint_body = 26
291
+ self.npaf_body = 52
292
+ self.model_type='body25'
293
+ self.translation_layer=translation_model
294
+
295
+ def call(self, window):
296
+ window_size=20
297
+ window_features=[]
298
+ blank_frame=np.zeros((1,156))
299
+ for idx,frame in enumerate(window.cpu()):
300
+
301
+ # frame=frame.cpu().numpy()[:, :, ::-1]
302
+ candidate, subset = self.bodypos(frame.cpu().numpy())
303
+ hands_list = util.handDetect(candidate, subset, frame.cpu().numpy())
304
+ all_hand_peaks = []
305
+ for x, y, w, is_left in hands_list:
306
+ peaks = self.handpos(frame.cpu().numpy()[y:y+w, x:x+w, :])
307
+ peaks[:, 0] = np.where(peaks[:, 0]==0, peaks[:, 0], peaks[:, 0]+x)
308
+ peaks[:, 1] = np.where(peaks[:, 1]==0, peaks[:, 1], peaks[:, 1]+y)
309
+ all_hand_peaks.append(peaks)
310
+
311
+ (bodypose_circles,bodypose_sticks,)=util.get_bodypose(candidate, subset, self.model_type)
312
+ (handpose_edges,handpose_peaks)=util.get_handpose(all_hand_peaks,)
313
+
314
+ feature=self.populate_features(bodypose_circles,handpose_peaks)
315
+ window_features.append(feature)
316
+
317
+ if len(window_features)<window_size:
318
+ for _ in range(0,(window_size-window_features.shape[0])):
319
+ window_features.append(blank_frame)
320
+
321
+
322
+ # self.writer.close()
323
+ # timeseries=self.create_timeseries_data(feature)
324
+ # self.frame_to_window(feature)
325
+ # np.savetxt('C:/Users/spsar/OneDrive/Desktop/test/MVI_9590.MOV.window1.numpy',np.array(window_features))
326
+ return self.translation_layer(np.array(window_features).reshape(1,20,156))
327
+
328
+
329
+ def frame_to_window(self,frame):
330
+ """
331
+ Converts a single frame to a rolling window array with zero padding.
332
+
333
+ Args:
334
+ frame: A numpy array representing a video frame.
335
+ window_size: The size of the rolling window (default: 20).
336
+ window (optional): An existing window array to add the frame to
337
+ (useful for maintaining rolling window state).
338
+
339
+ Returns:
340
+ A numpy array representing the rolling window with the added frame.
341
+ """
342
+
343
+ # Shift the window elements by 1 (oldest frame is dropped)
344
+ self.window[:-1] = self.window[1:]
345
+
346
+ # Add the new frame to the end of the window
347
+ self.window[-1] = frame
348
+
349
+ def populate_features(self,bodypose_circles,handpose_peaks):
350
+ # X_body_test = [f'bodypeaks_x_{i}' for i in range(15)] + [f'bodypeaks_y_{i}' for i in range(15)]
351
+ # X_hand0_test = [f'hand0peaks_x_{i}' for i in range(21)] + [f'hand0peaks_y_{i}' for i in range(21)] + [f'hand0peaks_peaktxt{i}' for i in range(21)]
352
+ # X_hand1_test = [f'hand1peaks_x_{i}' for i in range(21)] + [f'hand1peaks_y_{i}' for i in range(21)] + [f'hand1peaks_peaktxt{i}' for i in range(21)]
353
+
354
+ # feature_columns_new = X_body_test + X_hand0_test + X_hand1_test
355
+ feature=[]
356
+ for idx in range(15):
357
+ if(idx<len(bodypose_circles)):
358
+ feature.append(bodypose_circles[idx][0])
359
+ else:
360
+ feature.append(0)
361
+
362
+ for idx in range(15):
363
+ if(idx<len(bodypose_circles)):
364
+ feature.append(bodypose_circles[idx][1])
365
+ else:
366
+ feature.append(0)
367
+
368
+ for hand_idx in range(2):
369
+ for idx in range(21):
370
+ if(idx<len(handpose_peaks[hand_idx])):
371
+ feature.append(float(handpose_peaks[hand_idx][idx][0]))
372
+ else:
373
+ feature.append(0)
374
+
375
+ for idx in range(21):
376
+ if(idx<len(handpose_peaks[hand_idx])):
377
+ feature.append(float(handpose_peaks[hand_idx][idx][1]))
378
+ else:
379
+ feature.append(0)
380
+
381
+ for idx in range(21):
382
+ if(idx<len(handpose_peaks[hand_idx])):
383
+ feature.append(float(handpose_peaks[hand_idx][idx][2]))
384
+ else:
385
+ feature.append(0)
386
+
387
+ # for idx in range(21):
388
+ # if(idx<len(handpose_peaks[1])):
389
+ # feature.append(handpose_peaks[1][idx][0])
390
+ # else:
391
+ # feature.append(0)
392
+
393
+ # for idx in range(21):
394
+ # if(idx<len(handpose_peaks[1])):
395
+ # feature.append(handpose_peaks[1][idx][1])
396
+ # else:
397
+ # feature.append(0)
398
+
399
+ # for idx in range(21):
400
+ # if(idx<len(handpose_peaks[1])):
401
+ # feature.append(handpose_peaks[1][idx][2])
402
+ # else:
403
+ # feature.append(0)
404
+
405
+ # for idx,handedges in enumerate(handpose_edges):
406
+ # for (peaktxt, (handedge_x1, handedge_y1), (handedge_x2, handedge_y2)) in handedges:
407
+ # feature[f'hand{idx}edge_x1_{peaktxt}']=handedge_x1
408
+ # feature[f'hand{idx}edge_y1_{peaktxt}']=handedge_y1
409
+ # feature[f'hand{idx}edge_x2_{peaktxt}']=handedge_x2
410
+ # feature[f'hand{idx}edge_y2_{peaktxt}']=handedge_y2
411
+
412
+ X=np.array(feature)
413
+ # time_steps = 12 # Number of time steps
414
+ # num_features = X.shape[0] // time_steps # Number of features per time step
415
+ # X_reshaped = X.reshape(1,time_steps, num_features)
416
+ return X
417
+
418
+ def bodypos(self, oriImg):
419
+ model_type = 'body25'
420
+ scale_search = [0.5]
421
+ boxsize = 368
422
+ stride = 8
423
+ padValue = 128
424
+ thre1 = 0.1
425
+ thre2 = 0.05
426
+ multiplier = [x * boxsize / oriImg.shape[0] for x in scale_search]
427
+ heatmap_avg = np.zeros((oriImg.shape[0], oriImg.shape[1], self.njoint_body))
428
+ paf_avg = np.zeros((oriImg.shape[0], oriImg.shape[1], self.npaf_body))
429
+
430
+ for m in range(len(multiplier)):
431
+ scale = multiplier[m]
432
+ imageToTest = cv2.resize(oriImg, (0, 0), fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC)
433
+ imageToTest_padded, pad = util.padRightDownCorner(imageToTest, stride, padValue)
434
+ im = np.transpose(np.float32(imageToTest_padded[:, :, :, np.newaxis]), (3, 2, 0, 1)) / 256 - 0.5
435
+ im = np.ascontiguousarray(im)
436
+
437
+ data = torch.from_numpy(im).float()
438
+ if torch.cuda.is_available():
439
+ data = data.cuda()
440
+ with torch.no_grad():
441
+ Mconv7_stage6_L1, Mconv7_stage6_L2 = self.pt_body(data)
442
+ Mconv7_stage6_L1 = Mconv7_stage6_L1.cpu().numpy()
443
+ Mconv7_stage6_L2 = Mconv7_stage6_L2.cpu().numpy()
444
+
445
+ heatmap = np.transpose(np.squeeze(Mconv7_stage6_L2), (1, 2, 0)) # output 1 is heatmaps
446
+ heatmap = cv2.resize(heatmap, (0, 0), fx=stride, fy=stride, interpolation=cv2.INTER_CUBIC)
447
+ heatmap = heatmap[:imageToTest_padded.shape[0] - pad[2], :imageToTest_padded.shape[1] - pad[3], :]
448
+ heatmap = cv2.resize(heatmap, (oriImg.shape[1], oriImg.shape[0]), interpolation=cv2.INTER_CUBIC)
449
+
450
+ paf = np.transpose(np.squeeze(Mconv7_stage6_L1), (1, 2, 0)) # output 0 is PAFs
451
+ paf = cv2.resize(paf, (0, 0), fx=stride, fy=stride, interpolation=cv2.INTER_CUBIC)
452
+ paf = paf[:imageToTest_padded.shape[0] - pad[2], :imageToTest_padded.shape[1] - pad[3], :]
453
+ paf = cv2.resize(paf, (oriImg.shape[1], oriImg.shape[0]), interpolation=cv2.INTER_CUBIC)
454
+
455
+ heatmap_avg += heatmap_avg + heatmap / len(multiplier)
456
+ paf_avg += + paf / len(multiplier)
457
+
458
+ all_peaks = []
459
+ peak_counter = 0
460
+
461
+ for part in range(self.njoint_body-1):
462
+ map_ori = heatmap_avg[:, :, part]
463
+ one_heatmap = gaussian_filter(map_ori, sigma=3)
464
+
465
+ map_left = np.zeros(one_heatmap.shape)
466
+ map_left[1:, :] = one_heatmap[:-1, :]
467
+ map_right = np.zeros(one_heatmap.shape)
468
+ map_right[:-1, :] = one_heatmap[1:, :]
469
+ map_up = np.zeros(one_heatmap.shape)
470
+ map_up[:, 1:] = one_heatmap[:, :-1]
471
+ map_down = np.zeros(one_heatmap.shape)
472
+ map_down[:, :-1] = one_heatmap[:, 1:]
473
+
474
+ peaks_binary = np.logical_and.reduce(
475
+ (one_heatmap >= map_left, one_heatmap >= map_right, one_heatmap >= map_up, one_heatmap >= map_down, one_heatmap > thre1))
476
+ peaks = list(zip(np.nonzero(peaks_binary)[1], np.nonzero(peaks_binary)[0])) # note reverse
477
+ peaks_with_score = [x + (map_ori[x[1], x[0]],) for x in peaks]
478
+ peak_id = range(peak_counter, peak_counter + len(peaks))
479
+ peaks_with_score_and_id = [peaks_with_score[i] + (peak_id[i],) for i in range(len(peak_id))]
480
+
481
+ all_peaks.append(peaks_with_score_and_id)
482
+ peak_counter += len(peaks)
483
+
484
+ if model_type=='body25':
485
+ # find connection in the specified sequence, center 29 is in the position 15
486
+ limbSeq = [[1,0],[1,2],[2,3],[3,4],[1,5],[5,6],[6,7],[1,8],[8,9],[9,10],\
487
+ [10,11],[8,12],[12,13],[13,14],[0,15],[0,16],[15,17],[16,18],\
488
+ [11,24],[11,22],[14,21],[14,19],[22,23],[19,20]]
489
+ # the middle joints heatmap correpondence
490
+ mapIdx = [[30, 31],[14, 15],[16, 17],[18, 19],[22, 23],[24, 25],[26, 27],[0, 1],[6, 7],\
491
+ [2, 3],[4, 5], [8, 9],[10, 11],[12, 13],[32, 33],[34, 35],[36,37],[38,39],\
492
+ [50,51],[46,47],[44,45],[40,41],[48,49],[42,43]]
493
+ else:
494
+ # find connection in the specified sequence, center 29 is in the position 15
495
+ limbSeq = [[1, 2], [1, 5], [2, 3], [3, 4], [5, 6], [6, 7], [1, 8], [8, 9], \
496
+ [9, 10], [1, 11], [11, 12], [12, 13], [1, 0], [0, 14], [14, 16], \
497
+ [0, 15], [15, 17], [2, 16], [5, 17]]
498
+ # the middle joints heatmap correpondence
499
+ mapIdx = [[12, 13], [20, 21], [14, 15], [16, 17], [22, 23], [24, 25], [0, 1], [2, 3], \
500
+ [4, 5], [6, 7], [8, 9], [10, 11], [28, 29], [30, 31], [34, 35], [32, 33], \
501
+ [36, 37], [18, 19], [26, 27]]
502
+
503
+ connection_all = []
504
+ special_k = []
505
+ mid_num = 10
506
+
507
+ for k in range(len(mapIdx)):
508
+ score_mid = paf_avg[:, :, mapIdx[k]]
509
+ candA = all_peaks[limbSeq[k][0]]
510
+ candB = all_peaks[limbSeq[k][1]]
511
+
512
+ nA = len(candA)
513
+ nB = len(candB)
514
+ indexA, indexB = limbSeq[k]
515
+ if (nA != 0 and nB != 0):
516
+ connection_candidate = []
517
+ for i in range(nA):
518
+ for j in range(nB):
519
+ vec = np.subtract(candB[j][:2], candA[i][:2])
520
+ norm = math.sqrt(vec[0] * vec[0] + vec[1] * vec[1])
521
+ norm = max(0.001, norm)
522
+ vec = np.divide(vec, norm)
523
+
524
+ startend = list(zip(np.linspace(candA[i][0], candB[j][0], num=mid_num), \
525
+ np.linspace(candA[i][1], candB[j][1], num=mid_num)))
526
+
527
+ vec_x = np.array([score_mid[int(round(startend[I][1])), int(round(startend[I][0])), 0] \
528
+ for I in range(len(startend))])
529
+ vec_y = np.array([score_mid[int(round(startend[I][1])), int(round(startend[I][0])), 1] \
530
+ for I in range(len(startend))])
531
+
532
+ score_midpts = np.multiply(vec_x, vec[0]) + np.multiply(vec_y, vec[1])
533
+ score_with_dist_prior = sum(score_midpts) / len(score_midpts) + min(
534
+ 0.5 * oriImg.shape[0] / norm - 1, 0)
535
+ criterion1 = len(np.nonzero(score_midpts > thre2)[0]) > 0.8 * len(score_midpts)
536
+ criterion2 = score_with_dist_prior > 0
537
+ if criterion1 and criterion2:
538
+ connection_candidate.append(
539
+ [i, j, score_with_dist_prior, score_with_dist_prior + candA[i][2] + candB[j][2]])
540
+
541
+ connection_candidate = sorted(connection_candidate, key=lambda x: x[2], reverse=True)
542
+ connection = np.zeros((0, 5))
543
+ for c in range(len(connection_candidate)):
544
+ i, j, s = connection_candidate[c][0:3]
545
+ if (i not in connection[:, 3] and j not in connection[:, 4]):
546
+ connection = np.vstack([connection, [candA[i][3], candB[j][3], s, i, j]])
547
+ if (len(connection) >= min(nA, nB)):
548
+ break
549
+
550
+ connection_all.append(connection)
551
+ else:
552
+ special_k.append(k)
553
+ connection_all.append([])
554
+
555
+ # last number in each row is the total parts number of that person
556
+ # the second last number in each row is the score of the overall configuration
557
+ subset = -1 * np.ones((0, self.njoint_body+1))
558
+ candidate = np.array([item for sublist in all_peaks for item in sublist])
559
+
560
+ for k in range(len(mapIdx)):
561
+ if k not in special_k:
562
+ partAs = connection_all[k][:, 0]
563
+ partBs = connection_all[k][:, 1]
564
+ indexA, indexB = np.array(limbSeq[k])
565
+
566
+ for i in range(len(connection_all[k])): # = 1:size(temp,1)
567
+ found = 0
568
+ subset_idx = [-1, -1]
569
+ for j in range(len(subset)): # 1:size(subset,1):
570
+ if subset[j][indexA] == partAs[i] or subset[j][indexB] == partBs[i]:
571
+ subset_idx[found] = j
572
+ found += 1
573
+
574
+ if found == 1:
575
+ j = subset_idx[0]
576
+ if subset[j][indexB] != partBs[i]:
577
+ subset[j][indexB] = partBs[i]
578
+ subset[j][-1] += 1
579
+ subset[j][-2] += candidate[partBs[i].astype(int), 2] + connection_all[k][i][2]
580
+ elif found == 2: # if found 2 and disjoint, merge them
581
+ j1, j2 = subset_idx
582
+ membership = ((subset[j1] >= 0).astype(int) + (subset[j2] >= 0).astype(int))[:-2]
583
+ if len(np.nonzero(membership == 2)[0]) == 0: # merge
584
+ subset[j1][:-2] += (subset[j2][:-2] + 1)
585
+ subset[j1][-2:] += subset[j2][-2:]
586
+ subset[j1][-2] += connection_all[k][i][2]
587
+ subset = np.delete(subset, j2, 0)
588
+ else: # as like found == 1
589
+ subset[j1][indexB] = partBs[i]
590
+ subset[j1][-1] += 1
591
+ subset[j1][-2] += candidate[partBs[i].astype(int), 2] + connection_all[k][i][2]
592
+
593
+ # if find no partA in the subset, create a new subset
594
+ elif not found and k < self.njoint_body-2:
595
+ row = -1 * np.ones(self.njoint_body+1)
596
+ row[indexA] = partAs[i]
597
+ row[indexB] = partBs[i]
598
+ row[-1] = 2
599
+ row[-2] = sum(candidate[connection_all[k][i, :2].astype(int), 2]) + connection_all[k][i][2]
600
+ subset = np.vstack([subset, row])
601
+ # delete some rows of subset which has few parts occur
602
+ deleteIdx = []
603
+ for i in range(len(subset)):
604
+ if subset[i][-1] < 4 or subset[i][-2] / subset[i][-1] < 0.4:
605
+ deleteIdx.append(i)
606
+ subset = np.delete(subset, deleteIdx, axis=0)
607
+
608
+ # subset: n*20 array, 0-17 is the index in candidate, 18 is the total score, 19 is the total parts
609
+ # candidate: x, y, score, id
610
+ return candidate, subset
611
+
612
+ def handpos(self, oriImg):
613
+ scale_search = [0.5, 1.0, 1.5, 2.0]
614
+ # scale_search = [0.5]
615
+ boxsize = 368
616
+ stride = 8
617
+ padValue = 128
618
+ thre = 0.05
619
+ multiplier = [x * boxsize / oriImg.shape[0] for x in scale_search]
620
+ heatmap_avg = np.zeros((oriImg.shape[0], oriImg.shape[1], 22))
621
+ # paf_avg = np.zeros((oriImg.shape[0], oriImg.shape[1], 38))
622
+
623
+ for m in range(len(multiplier)):
624
+ scale = multiplier[m]
625
+ imageToTest = cv2.resize(oriImg, (0, 0), fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC)
626
+ imageToTest_padded, pad = util.padRightDownCorner(imageToTest, stride, padValue)
627
+ im = np.transpose(np.float32(imageToTest_padded[:, :, :, np.newaxis]), (3, 2, 0, 1)) / 256 - 0.5
628
+ im = np.ascontiguousarray(im)
629
+
630
+ data = torch.from_numpy(im).float()
631
+ if torch.cuda.is_available():
632
+ data = data.cuda()
633
+ # data = data.permute([2, 0, 1]).unsqueeze(0).float()
634
+ with torch.no_grad():
635
+ output = self.pt_hand(data).cpu().numpy()
636
+ # output = self.model(data).numpy()q
637
+
638
+ # extract outputs, resize, and remove padding
639
+ heatmap = np.transpose(np.squeeze(output), (1, 2, 0)) # output 1 is heatmaps
640
+ heatmap = cv2.resize(heatmap, (0, 0), fx=stride, fy=stride, interpolation=cv2.INTER_CUBIC)
641
+ heatmap = heatmap[:imageToTest_padded.shape[0] - pad[2], :imageToTest_padded.shape[1] - pad[3], :]
642
+ heatmap = cv2.resize(heatmap, (oriImg.shape[1], oriImg.shape[0]), interpolation=cv2.INTER_CUBIC)
643
+
644
+ heatmap_avg += heatmap / len(multiplier)
645
+
646
+ all_peaks = []
647
+ for part in range(21):
648
+ map_ori = heatmap_avg[:, :, part]
649
+ one_heatmap = gaussian_filter(map_ori, sigma=3)
650
+ binary = np.ascontiguousarray(one_heatmap > thre, dtype=np.uint8)
651
+ # 全部小于阈值
652
+ if np.sum(binary) == 0:
653
+ all_peaks.append([0, 0])
654
+ continue
655
+ label_img, label_numbers = label(binary, return_num=True, connectivity=binary.ndim)
656
+ max_index = np.argmax([np.sum(map_ori[label_img == i]) for i in range(1, label_numbers + 1)]) + 1
657
+ label_img[label_img != max_index] = 0
658
+ map_ori[label_img == 0] = 0
659
+
660
+ y, x = util.npmax(map_ori)
661
+ all_peaks.append([x, y])
662
+ return np.array(all_peaks)
app.py ADDED
@@ -0,0 +1,653 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #Modified by Augmented Startups 2021
2
+ #Face Landmark User Interface with StreamLit
3
+ #Watch Computer Vision Tutorials at www.augmentedstartups.info/YouTube
4
+ import streamlit as st
5
+ st.write("Booting…")
6
+ import os
7
+ os.environ["KERAS_BACKEND"] = "torch"
8
+ import keras
9
+
10
+ import cv2
11
+ import numpy as np
12
+ import tempfile
13
+ import time
14
+ from PIL import Image
15
+ from keras.models import Sequential
16
+ import os
17
+ from keras.models import Sequential
18
+ import pickle
19
+ import keras
20
+ from keras.models import Sequential
21
+ import os
22
+ from keras.layers import LSTM, Dense, Bidirectional, Dropout,Input,BatchNormalization
23
+ from model import handpose_model, bodypose_25_model
24
+ from expression_mapping import expression_mapping
25
+ from ISL_Model_parameter import ISLSignPosTranslator
26
+ import pandas as pd
27
+ import numpy as np
28
+ import ffmpeg
29
+ import subprocess
30
+ from typing import NamedTuple
31
+ import json
32
+ import util
33
+ from huggingface_hub import hf_hub_download
34
+
35
+ import shutil, platform, subprocess
36
+
37
+ st.write("Python:", platform.python_version())
38
+ st.write("FFmpeg exists:", shutil.which("ffmpeg"), "FFprobe:", shutil.which("ffprobe"))
39
+
40
+ try:
41
+ import cv2
42
+ st.write("OpenCV version:", cv2.__version__)
43
+ except Exception as e:
44
+ st.error(f"OpenCV import failed: {e}")
45
+
46
+ try:
47
+ import torch, keras
48
+ st.write("Torch:", torch.__version__, "Keras:", keras.__version__)
49
+ except Exception as e:
50
+ st.error(f"Torch/Keras import failed: {e}")
51
+
52
+ class FFProbeResult(NamedTuple):
53
+ return_code: int
54
+ json: str
55
+ error: str
56
+
57
+
58
+ def ffprobe(file_path) -> FFProbeResult:
59
+ command_array = ["ffprobe",
60
+ "-v", "quiet",
61
+ "-print_format", "json",
62
+ "-show_format",
63
+ "-show_streams",
64
+ file_path]
65
+ result = subprocess.run(command_array, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True)
66
+ return FFProbeResult(return_code=result.returncode,
67
+ json=result.stdout,
68
+ error=result.stderr)
69
+ X_body_test = [f'bodypeaks_x_{i}' for i in range(15)] + [f'bodypeaks_y_{i}' for i in range(15)]
70
+ X_hand0_test = [f'hand0peaks_x_{i}' for i in range(21)] + [f'hand0peaks_y_{i}' for i in range(21)] + [f'hand0peaks_peaktxt{i}' for i in range(21)]
71
+ X_hand1_test = [f'hand1peaks_x_{i}' for i in range(21)] + [f'hand1peaks_y_{i}' for i in range(21)] + [f'hand1peaks_peaktxt{i}' for i in range(21)]
72
+
73
+ feature_columns_new = X_body_test + X_hand0_test + X_hand1_test
74
+ label_columns = ['Expression_encoded']
75
+
76
+ @st.cache_resource
77
+ def create_timeseries_data(isl_data,feature_columns,label_columns, window_size=20):
78
+ """
79
+ Creates timeseries data from a DataFrame with a specified window size
80
+ and padding at the end.
81
+
82
+ Args:
83
+ df (pandas.DataFrame): The input DataFrame.
84
+ window_size (int, optional): The window size for creating timeseries data. Defaults to 20.
85
+ pad_value (any, optional): The value to use for padding at the end. Defaults to None.
86
+
87
+ Returns:
88
+ list: A list of lists, where each inner list represents a window of timeseries data.
89
+ """
90
+
91
+ # Handle empty DataFrame
92
+ if isl_data.empty:
93
+ return [],[]
94
+
95
+ X=[]
96
+ y=[]
97
+ i=0
98
+ for group, file_df in isl_data.groupby(['Type','Expression_encoded','FileName']):
99
+ expr_types,exprs,filepaths=group
100
+ # print('expr_types,exprs,filepaths',(expr_types,exprs,filepaths))
101
+ # print(type(name))
102
+ # Get the rolling window iterator with padding
103
+ first_frame=np.zeros((1,156))
104
+ for idx,x in enumerate([file_df[i:i+window_size] for i in range(0,file_df.shape[0],1)]):#enumerate(file_df.rolling(window=20, step=20,min_periods=1)):
105
+ # print(f'records processed {idx} of {file_df.shape[0]}')
106
+ # print(f"{filepaths}-Frame#{x['Frame'].values}/{file_df['Frame'].max()}")
107
+ if x.shape[0]<window_size:
108
+ X.append(np.concatenate((np.repeat(first_frame, (window_size-x.shape[0]), axis=0),x[feature_columns].values), axis=0))
109
+ y.append(exprs)
110
+ # print('len(X)',len(X))
111
+ # print('len(y)',len(y))
112
+ continue
113
+
114
+
115
+ X.append(x[feature_columns].values)
116
+ y.append(exprs)
117
+ # print('len(X)',len(X))
118
+ # print('len(y)',len(y))
119
+ # if idx>4:
120
+ # break
121
+
122
+ # i=i+1
123
+ # if i>4:
124
+ # break
125
+
126
+ return X,y
127
+
128
+ translation_model=None
129
+
130
+ @st.cache_resource
131
+ def get_translator_model():
132
+ translation_model = Sequential()
133
+ translation_model.add(Input(shape=((20, 156))))
134
+ translation_model.add(keras.layers.Masking(mask_value=0.))
135
+ translation_model.add(BatchNormalization())
136
+ translation_model.add(Bidirectional(LSTM(32, recurrent_dropout=0.2, return_sequences=True)))
137
+
138
+ translation_model.add(Dropout(0.2))
139
+ translation_model.add(Bidirectional(LSTM(32, recurrent_dropout=0.2)))
140
+
141
+ translation_model.add(keras.layers.Activation('elu'))
142
+ translation_model.add(Dense(32, use_bias=False, kernel_initializer='he_normal'))
143
+
144
+ translation_model.add(BatchNormalization())
145
+ translation_model.add(Dropout(0.2))
146
+ translation_model.add(keras.layers.Activation('elu'))
147
+ translation_model.add(Dense(32, kernel_initializer='he_normal',use_bias=False))
148
+
149
+ translation_model.add(BatchNormalization())
150
+ translation_model.add(keras.layers.Activation('elu'))
151
+ translation_model.add(Dropout(0.2))
152
+ translation_model.add(Dense(len(list(expression_mapping.keys())), activation='softmax'))
153
+ translation_model.load_weights('isl_model_final.keras')
154
+ return translation_model
155
+
156
+
157
+ testing_cleaned = hf_hub_download(
158
+ repo_id="sunilsarolkar/isl-test-data",
159
+ filename="testing_cleaned.csv",
160
+ repo_type="dataset"
161
+ )
162
+
163
+ testing_df=pd.read_csv(testing_cleaned)
164
+ test_files = hf_hub_download(
165
+ repo_id="sunilsarolkar/isl-test-data",
166
+ filename="test_files.csv",
167
+ repo_type="dataset"
168
+ )
169
+ # test_statistic_df=pd.read_csv('test_statistic.csv')
170
+ test_files_df=pd.read_csv(test_files)
171
+ # mp_drawing = mp.solutions.drawing_utils
172
+ # mp_face_mesh = mp.solutions.face_mesh
173
+
174
+
175
+ class Writer():
176
+ def __init__(self, output_file, input_fps, input_framesize, input_pix_fmt,
177
+ input_vcodec):
178
+ # if os.path.exists(output_file):
179
+ # os.remove(output_file)
180
+ self.ff_proc = (
181
+ ffmpeg
182
+ .input('pipe:',
183
+ format='rawvideo',
184
+ pix_fmt="bgr24",
185
+ s='%sx%s'%(input_framesize[1],input_framesize[0]),
186
+ r=input_fps)
187
+ .output(output_file, pix_fmt=input_pix_fmt, vcodec=input_vcodec)
188
+ .overwrite_output()
189
+ .run_async(pipe_stdin=True)
190
+ )
191
+
192
+ def __call__(self, frame):
193
+ self.ff_proc.stdin.write(frame.tobytes())
194
+
195
+ def close(self):
196
+ self.ff_proc.stdin.close()
197
+ self.ff_proc.wait()
198
+
199
+
200
+ st.title('ISL Indian Sign Language translation using LSTM')
201
+
202
+ st.markdown(
203
+ """
204
+ <style>
205
+ [data-testid="stSidebar"][aria-expanded="true"] > div:first-child {
206
+ width: 350px;
207
+ }
208
+ [data-testid="stSidebar"][aria-expanded="false"] > div:first-child {
209
+ width: 350px;
210
+ margin-left: -350px;
211
+ }
212
+ </style>
213
+ """,
214
+ unsafe_allow_html=True,
215
+ )
216
+
217
+ st.sidebar.title('ISL Sign Language Translation using Openpose')
218
+ st.sidebar.subheader('Parameters')
219
+ frame_wise_outputs={}
220
+
221
+ def weighted_average(nums, weights):
222
+ if sum(weights)==0:
223
+ return 0
224
+ return sum(x * y for x, y in zip(nums, weights)) / sum(weights)
225
+
226
+
227
+ @st.cache_data
228
+ def image_resize(image, width=None, height=None, inter=cv2.INTER_AREA):
229
+ # initialize the dimensions of the image to be resized and
230
+ # grab the image size
231
+ dim = None
232
+ (h, w) = image.shape[:2]
233
+
234
+ # if both the width and height are None, then return the
235
+ # original image
236
+ if width is None and height is None:
237
+ return image
238
+
239
+ # check to see if the width is None
240
+ if width is None:
241
+ # calculate the ratio of the height and construct the
242
+ # dimensions
243
+ r = height / float(h)
244
+ dim = (int(w * r), height)
245
+
246
+ # otherwise, the height is None
247
+ else:
248
+ # calculate the ratio of the width and construct the
249
+ # dimensions
250
+ r = width / float(w)
251
+ dim = (width, int(h * r))
252
+
253
+ # resize the image
254
+ resized = cv2.resize(image, dim, interpolation=inter)
255
+
256
+ # return the resized image
257
+ return resized
258
+
259
+ app_mode = st.sidebar.selectbox('Choose the App mode',
260
+ ['About App','Run on Test Videos']
261
+ )
262
+
263
+ if app_mode =='About App':
264
+ st.markdown('In this application we are demonstrating model developed for translating the Indian Sign Language(ISL) using LSTM')
265
+ st.markdown(
266
+ """
267
+ <style>
268
+ [data-testid="stSidebar"][aria-expanded="true"] > div:first-child {
269
+ width: 400px;
270
+ }
271
+ [data-testid="stSidebar"][aria-expanded="false"] > div:first-child {
272
+ width: 400px;
273
+ margin-left: -400px;
274
+ }
275
+ </style>
276
+ """,
277
+ unsafe_allow_html=True,
278
+ )
279
+ # st.video('https://www.youtube.com/watch?v=FMaNNXgB_5c&ab_channel=AugmentedStartups')
280
+
281
+ st.markdown('''
282
+ # Dataset Used \n
283
+ This model is trained using [INCLUDE](https://zenodo.org/records/4010759) dataset. \n
284
+
285
+ ### Key Statistics for the dataset is as follows-
286
+
287
+
288
+
289
+ +-----------------------+-----------------+
290
+ | Charasteristics | INCLUDE-DATASET |
291
+ +-----------------------+-----------------+
292
+ | Categories | 15 |
293
+ | Words | 263 |
294
+ | Videos | 4257 |
295
+ | Avg Videos per class | 16.3 |
296
+ | Avg Video Length | 2.57s |
297
+ | Min Video Length | 1.28s |
298
+ | Max Video Length | 6.16s |
299
+ | Frame Rate | 25fps |
300
+ | Resolution | 1920x1080 |
301
+ +-----------------------+-----------------+
302
+ #### Size of each category
303
+
304
+
305
+ +--------------------+-------------------+------------------+
306
+ | Category | Number of Classes | Number of Videos |
307
+ +--------------------+-------------------+------------------+
308
+ | Adjectives | 59 | 791 |
309
+ | Animals | 8 | 166 |
310
+ | Clothes | 10 | 198 |
311
+ | Colours | 11 | 222 |
312
+ | Days and Time | 22 | 306 |
313
+ | Electronics | 10 | 140 |
314
+ | Greetings | 9 | 185 |
315
+ | Means of Transport | 9 | 186 |
316
+ | Objects at Home | 27 | 379 |
317
+ | Occupations | 16 | 225 |
318
+ | People | 26 | 513 |
319
+ | Places | 19 | 399 |
320
+ | Pronouns | 8 | 168 |
321
+ | Seasons | 6 | 85 |
322
+ | Society | 23 | 324 |
323
+ | | Categories# 263 | Total Videos-4287|
324
+ +--------------------+-------------------+------------------+
325
+
326
+
327
+
328
+ Below are count of videos we were able to process (1986 of 4287). We processed limited set of records due to time/compute constraints.
329
+
330
+ ''')
331
+
332
+ image = np.array(Image.open('eda/categories_processed.png'))
333
+ # categories_processed = np.array(Image.open('categories_processed.png'))
334
+ st.image(image)
335
+ st.markdown('''
336
+ #### Below are the count of Videos per Label for each Dataframe
337
+ ''')
338
+ image = np.array(Image.open('eda/distribution_of_data.png'))
339
+ # categories_processed = np.array(Image.open('categories_processed.png'))
340
+
341
+
342
+ st.image(image)
343
+
344
+ st.markdown('''
345
+ ### Date Pipeline
346
+ ''')
347
+
348
+ image = np.array(Image.open('DataPipeline.png'))
349
+ # categories_processed = np.array(Image.open('categories_processed.png'))
350
+ st.image(image)
351
+ st.markdown('''
352
+ ### Model structure
353
+ ```
354
+ translation_model = Sequential()
355
+ translation_model.add(Input(shape=((20, 156))))
356
+ translation_model.add(keras.layers.Masking(mask_value=0.))
357
+ translation_model.add(BatchNormalization())
358
+ translation_model.add(Bidirectional(LSTM(32, recurrent_dropout=0.2, return_sequences=True)))
359
+
360
+ translation_model.add(Dropout(0.2))
361
+ translation_model.add(Bidirectional(LSTM(32, recurrent_dropout=0.2)))
362
+
363
+ translation_model.add(keras.layers.Activation('elu'))
364
+ translation_model.add(Dense(32, use_bias=False, kernel_initializer='he_normal'))
365
+
366
+ translation_model.add(BatchNormalization())
367
+ translation_model.add(Dropout(0.2))
368
+ translation_model.add(keras.layers.Activation('elu'))
369
+ translation_model.add(Dense(32, kernel_initializer='he_normal',use_bias=False))
370
+
371
+ translation_model.add(BatchNormalization())
372
+ translation_model.add(keras.layers.Activation('elu'))
373
+ translation_model.add(Dropout(0.2))
374
+ translation_model.add(Dense(len(list(expression_mapping.keys())), activation='softmax'))
375
+ isl_translator=ISLSignPosTranslator(bodypose_25_model(),handpose_model(), translation_model)
376
+ ```
377
+
378
+ Total params: 82,679 (322.96 KB)
379
+ Trainable params: 82,239 (321.25 KB)
380
+ Non-trainable params: 440 (1.72 KB)
381
+ ''')
382
+ image = np.array(Image.open('model-graph.png'))
383
+ # categories_processed = np.array(Image.open('categories_processed.png'))
384
+ st.image(image)
385
+ st.markdown('''
386
+ # Training
387
+ [Tensorboard](https://huggingface.co/cdsteameight/ISL-SignLanguageTranslation/tensorboard)
388
+
389
+ ''')
390
+
391
+ elif app_mode =='Run on Test Videos':
392
+ # placeholder = st.empty()
393
+ category = st.sidebar.selectbox('Choose Category',
394
+ np.sort(test_files_df['Category'].unique(), axis=-1, kind='mergesort'))
395
+ # print(category)
396
+ mask = (test_files_df['Category']==category)
397
+ test_files_df_category=test_files_df[mask]
398
+ cls = st.sidebar.selectbox('Choose Class',
399
+ np.sort(test_files_df_category['Class'].unique(), axis=-1, kind='mergesort')
400
+ )
401
+ mask = (test_files_df['Class']==cls)
402
+ filename = st.sidebar.selectbox('Choose File',
403
+ np.sort(test_files_df_category[mask]['Filename'].unique(), axis=-1, kind='mergesort')
404
+ )
405
+ # print(f'test/{category}/{cls}/{filename}')
406
+ # mask = (include_df['Filepath'].str.contains(key[0])) & (include_df['type']==key[2]) & (include_df['expression']==key[1])
407
+ # stframe = st.empty()
408
+
409
+ if st.sidebar.button("Start", type="primary"):
410
+ mask = (testing_df['FileName'] == filename) & (testing_df['Type']==category)& (testing_df['Expression']==cls)
411
+ # filtered_df = current_test_df.sort_
412
+
413
+ window_size=20
414
+ current_test_df=testing_df[mask]
415
+ X_test_filtered,y_test_filtered = create_timeseries_data(current_test_df,feature_columns_new,label_columns,window_size=window_size)
416
+ # y_filtered_encoded=to_categorical(y_test_filtered, num_classes=len(df['Expression_encoded'].unique()))
417
+ X_test_filtered=np.array(X_test_filtered)
418
+
419
+ # encoded_translation=model(frame.reshape(1,frame.shape[0],frame.shape[1]))
420
+ st.set_option('deprecation.showfileUploaderEncoding', False)
421
+
422
+ # use_webcam = st.sidebar.button('Use Webcam')
423
+ # record = st.sidebar.checkbox("Record Video")
424
+ # if record:
425
+ # st.checkbox("Recording", value=True)
426
+
427
+ st.sidebar.markdown('---')
428
+ st.markdown(
429
+ """
430
+ <style>
431
+ [data-testid="stSidebar"][aria-expanded="true"] > div:first-child {
432
+ width: 400px;
433
+ }
434
+ [data-testid="stSidebar"][aria-expanded="false"] > div:first-child {
435
+ width: 400px;
436
+ margin-left: -400px;
437
+ }
438
+ </style>
439
+ """,
440
+ unsafe_allow_html=True,
441
+ )
442
+
443
+ st.sidebar.markdown('---')
444
+
445
+ st.markdown(' ## Output')
446
+
447
+ runtime_progress = st.empty()
448
+
449
+ with runtime_progress.container():
450
+ df1 = pd.DataFrame([['--','--']], columns=['Frames Processed','Detected Class'])
451
+
452
+ my_table = st.table(df1)
453
+ # kpi1, kpi2 = st.columns(2)
454
+
455
+ # with kpi1:
456
+ # st.markdown("**Frames Processed**")
457
+ # kpi1_text = st.markdown(f'0/{current_test_df.shape[0]}')
458
+
459
+ # with kpi2:
460
+ # st.markdown("**Detected Class**")
461
+ # kpi2_text = st.markdown("--")
462
+
463
+ view = st.empty()
464
+
465
+ st.markdown("<hr/>", unsafe_allow_html=True)
466
+ stframes = st.empty()#[st.empty() for _ in range(20)]
467
+ # video_file_buffer = st.sidebar.file_uploader("Upload a video", type=[ "mp4", "mov",'avi','asf', 'm4v' ])
468
+ # tfflie = tempfile.NamedTemporaryFile(delete=False)
469
+
470
+ vid_file = hf_hub_download(
471
+ repo_id="sunilsarolkar/isl-test-data",
472
+ filename=f'test/{category}/{cls}/{filename}',
473
+ repo_type="dataset"
474
+ )
475
+
476
+ vid = cv2.VideoCapture(vid_file)
477
+
478
+ ffprobe_result = ffprobe(vid_file)
479
+ info = json.loads(ffprobe_result.json)
480
+ videoinfo = [i for i in info["streams"] if i["codec_type"] == "video"][0]
481
+ input_fps = videoinfo["avg_frame_rate"]
482
+ # input_fps = float(input_fps[0])/float(input_fps[1])
483
+ input_pix_fmt = videoinfo["pix_fmt"]
484
+ input_vcodec = videoinfo["codec_name"]
485
+ postfix = info["format"]["format_name"].split(",")[0]
486
+ # print(f'input_vcodec-{input_vcodec}')
487
+
488
+ width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
489
+ height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
490
+ fps_input = int(vid.get(cv2.CAP_PROP_FPS))
491
+
492
+ #codec = cv2.VideoWriter_fourcc(*FLAGS.output_format)
493
+ # codec = cv2.VideoWriter_fourcc('V','P','0','9')
494
+ # out = cv2.VideoWriter('output1.mp4', codec, fps_input, (width, height))
495
+
496
+ # st.sidebar.text('Input Video')
497
+ # st.sidebar.video(tfflie.name)
498
+ fps = 0
499
+ i = 0
500
+
501
+
502
+
503
+ # cap = cv2.VideoCapture(video_file,)
504
+ totalFrames=int(vid.get(cv2.CAP_PROP_FRAME_COUNT))
505
+ window_size=20
506
+ # print('current_test_df',current_test_df)
507
+ # print('totalFrames',totalFrames)
508
+ window=[]
509
+
510
+
511
+ prevTime = 0
512
+ postfix = info["format"]["format_name"].split(",")[0]
513
+
514
+ with tempfile.NamedTemporaryFile(suffix=f'.{postfix}',delete=False) as tfflie:
515
+ output_file = tfflie.name#'./output.mp4'
516
+ # width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
517
+ # height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
518
+ fps_input = int(vid.get(cv2.CAP_PROP_FPS))
519
+
520
+ #codec = cv2.VideoWriter_fourcc(*FLAGS.output_format)
521
+
522
+ # codec = cv2.VideoWriter_fourcc('m','p','4','v')
523
+ out = None
524
+ writer=None
525
+ weighted_avg_dict={}
526
+
527
+ idx=0
528
+
529
+ for _, row in current_test_df.iterrows():#enumerate(file_df.rolling(window=20, step=20,min_periods=1)):
530
+ # print(f'captured frame#{idx}')
531
+ if(vid.isOpened()):
532
+ ret, frame = vid.read()
533
+
534
+
535
+ if len(window)<window_size:
536
+ canvas=util.drawStickmodel(frame,eval(row['bodypose_circles']),eval(row['bodypose_sticks']),eval(row['handpose_edges']),eval(row['handpose_peaks']))
537
+ canvas_with_plot=util.draw_bar_plot_below_image(canvas,{}, f'Prediction bar plot - Frame number {idx+1} [** no predictions]',canvas)
538
+ canvas_with_plot=util.draw_bar_plot_below_image(canvas_with_plot,weighted_avg_dict, f'Weighted avg - Frame number {idx+1} [** no predictions]',canvas)
539
+ canvas_with_plot=util.add_padding_to_bottom(canvas_with_plot,(255,255,255),100)# Adds padding at bottom
540
+
541
+ if writer is None:
542
+ input_framesize = canvas_with_plot.shape[:2]
543
+ writer = Writer(output_file, input_fps, input_framesize, input_pix_fmt,
544
+ input_vcodec)
545
+
546
+ # if out is None:
547
+ # out=cv2.VideoWriter(output_file, codec, fps_input, frame.shape[:2])
548
+
549
+
550
+ writer(canvas_with_plot)
551
+ # out.write(canvas)
552
+ with runtime_progress.container():
553
+ df1 = pd.DataFrame([[f'{idx+1}/{current_test_df.shape[0]}','<model will output after 20 frames>']], columns=['Frames Processed','Detected Class'])
554
+
555
+ my_table = st.table(df1)
556
+ window.append(frame)
557
+ # kpi1_text.write(f"<h1 style='text-align: center; color: red;'>{idx+1}/{current_test_df.shape[0]}</h1>", unsafe_allow_html=True)
558
+ # kpi2_text.write(f"<h1 style='text-align: center; color: red;'>--</h1>", unsafe_allow_html=True)
559
+ with view.container():
560
+ st.image(canvas_with_plot,channels = 'BGR',use_column_width=True)
561
+ else:
562
+
563
+ window[:-1] = window[1:]
564
+ window[-1]=frame
565
+ translation_model=get_translator_model()
566
+ # testing_df[]
567
+
568
+ encoded_translation = translation_model(X_test_filtered[idx-20].reshape(1,X_test_filtered[idx-20].shape[0],X_test_filtered[idx-20].shape[1]))
569
+ encoded_translation=encoded_translation[0].cpu().detach().numpy()
570
+ sorted_index=np.argsort(encoded_translation)[::-1]
571
+ maxindex=np.argmax(encoded_translation)
572
+
573
+ top_3_probs = encoded_translation.argsort()[-3:][::-1] # Get indices of top 3 probabilities (descending order)
574
+ top_3_categories = [expression_mapping[i] for i in top_3_probs] # Convert indices to category names (assuming class_names list exists)
575
+ top_3_values = encoded_translation[top_3_probs] # Get corresponding probabilities
576
+ # print(f'{idx} {encoded_translation[maxindex]:0.4f} {maxindex}-{expression_mapping[maxindex]} ')#{[(pi,encoded_translation[pi],expression_mapping[pi]) for pi in sorted_index]}
577
+ for category, prob in zip(top_3_categories, top_3_values):
578
+ if category not in frame_wise_outputs:
579
+ frame_wise_outputs[category]=[]
580
+ frame_wise_outputs[category].append(prob)
581
+ current_prob={}
582
+
583
+ for category, prob in zip(top_3_categories, top_3_values):
584
+ current_prob[category]=prob
585
+
586
+ for key in frame_wise_outputs:
587
+ weighted_avg_dict[key]=weighted_average(frame_wise_outputs[key],[len(frame_wise_outputs[key]) for i in range(len(frame_wise_outputs[key]))])
588
+
589
+ sorted_dict = dict(sorted(weighted_avg_dict.items(), key=lambda item: item[1], reverse=True))
590
+ canvas=util.drawStickmodel(frame,eval(row['bodypose_circles']),eval(row['bodypose_sticks']),eval(row['handpose_edges']),eval(row['handpose_peaks']))
591
+ canvas_with_plot=util.draw_bar_plot_below_image(canvas,current_prob, f'Prediction at frame window({idx-20+1}-{idx+1})',canvas)
592
+ canvas_with_plot=util.draw_bar_plot_below_image(canvas_with_plot,weighted_avg_dict, f'Weighted avg till window {idx+1}',canvas)
593
+ canvas_with_plot=util.add_padding_to_bottom(canvas_with_plot,(255,255,255),100)
594
+ writer(canvas_with_plot)
595
+
596
+
597
+ currTime = time.time()
598
+ fps = 1 / (currTime - prevTime)
599
+ prevTime = currTime
600
+ # out.write(frame)
601
+ # if record:
602
+ # #st.checkbox("Recording", value=True)
603
+ # out.write(frame)
604
+ #Dashboard
605
+
606
+ max_prob = float('-inf') # Initialize with negative infinity
607
+ max_key = None
608
+
609
+ for exp, prob in weighted_avg_dict.items():
610
+ if prob > max_prob:
611
+ max_prob = prob
612
+ max_key = exp
613
+ with runtime_progress.container():
614
+ df1 = pd.DataFrame([[f'{idx+1}/{current_test_df.shape[0]}',f'{max_key} ({max_prob*100:.2f}%)']], columns=['Frames Processed','Detected Class'])
615
+ my_table = st.table(df1)
616
+ # kpi1_text.write(f"<h1 style='text-align: center; color: red;'>{idx+1}/{current_test_df.shape[0]}</h1>", unsafe_allow_html=True)
617
+ # kpi2_text.write(f"<h1 style='text-align: center; color: red;'>{max_key} ({max_prob*100:.2f}%)</h1>", unsafe_allow_html=True)
618
+ # with placeholder.container():
619
+ # # st.write(weighted_avg_dict)
620
+ # # data = {
621
+ # # "I": 0.7350964583456516,
622
+ # # "Hello": 0.1078806109726429,
623
+ # # "you": 0.11776176246348768,
624
+ # # "you (plural)": 0.12685142129916568
625
+ # # }
626
+
627
+ # # Convert the dictionary to a Pandas DataFrame for easier plotting
628
+ # df = pd.DataFrame.from_dict(weighted_avg_dict, orient='index', columns=['Values'])
629
+
630
+ # # Create a bar chart with Streamlit
631
+ # st.bar_chart(df)
632
+ # frame = cv2.resize(frame,(0,0),fx = 0.8 , fy = 0.8)
633
+ # frame = image_resize(image = frame, width = 640)
634
+ with view.container():
635
+ st.image(canvas_with_plot,channels = 'BGR',use_column_width=True)
636
+
637
+ idx=idx+1
638
+
639
+
640
+ # st.text('Video Processed')
641
+ with view.container():
642
+ writer.close()
643
+ # out. release()
644
+ output_video = open(output_file,'rb')
645
+ out_bytes = output_video.read()
646
+ st.video(out_bytes)
647
+ # out.release()
648
+
649
+ print(f'Output file - {output_file}')
650
+ cv2.destroyAllWindows()
651
+ vid.release()
652
+
653
+
expression_mapping.py ADDED
@@ -0,0 +1,168 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ expression_mapping={107: "alive",
2
+ 58: "Nice",
3
+ 8: "Beautiful",
4
+ 115: "dead",
5
+ 120: "famous",
6
+ 122: "female",
7
+ 51: "Mean",
8
+ 21: "Deaf",
9
+ 111: "clean",
10
+ 117: "dirty",
11
+ 123: "flat",
12
+ 110: "cheap",
13
+ 119: "expensive",
14
+ 116: "deep",
15
+ 99: "Ugly",
16
+ 114: "curved",
17
+ 12: "Blind",
18
+ 142: "poor",
19
+ 138: "male",
20
+ 126: "hard",
21
+ 133: "light",
22
+ 137: "low",
23
+ 113: "cool",
24
+ 144: "rich",
25
+ 109: "big large",
26
+ 108: "bad",
27
+ 112: "cold",
28
+ 135: "loose",
29
+ 121: "fast",
30
+ 141: "old",
31
+ 130: "high",
32
+ 118: "dry",
33
+ 145: "sad",
34
+ 131: "hot",
35
+ 125: "happy",
36
+ 129: "heavy",
37
+ 128: "healthy",
38
+ 124: "good",
39
+ 146: "shallow",
40
+ 153: "strong",
41
+ 161: "weak",
42
+ 157: "thin",
43
+ 158: "tight",
44
+ 136: "loud",
45
+ 139: "narrow",
46
+ 134: "long",
47
+ 156: "thick",
48
+ 148: "short",
49
+ 152: "soft",
50
+ 150: "slow",
51
+ 151: "small little",
52
+ 149: "sick",
53
+ 154: "tall",
54
+ 140: "new",
55
+ 143: "quiet",
56
+ 95: "Today",
57
+ 163: "wide",
58
+ 159: "warm",
59
+ 96: "Tomorrow",
60
+ 162: "wet",
61
+ 1: "Afternoon",
62
+ 27: "Evening",
63
+ 56: "Morning",
64
+ 59: "Night",
65
+ 166: "young",
66
+ 53: "Minute",
67
+ 38: "Hour",
68
+ 88: "Sunday",
69
+ 55: "Month",
70
+ 94: "Time",
71
+ 70: "Pleased",
72
+ 63: "Paper",
73
+ 105: "Year",
74
+ 80: "Second",
75
+ 32: "Gift",
76
+ 102: "Week",
77
+ 43: "Key",
78
+ 48: "Lock",
79
+ 4: "Bag",
80
+ 106: "Yesterday",
81
+ 7: "Bathroom",
82
+ 15: "Card",
83
+ 66: "Pen",
84
+ 45: "Letter",
85
+ 9: "Bed",
86
+ 2: "Alright",
87
+ 67: "Pencil",
88
+ 24: "Dream",
89
+ 13: "Book",
90
+ 44: "Kitchen",
91
+ 92: "Telephone",
92
+ 23: "Door",
93
+ 36: "Hello",
94
+ 61: "Page",
95
+ 40: "How are you",
96
+ 16: "Chair",
97
+ 89: "Table",
98
+ 97: "Tool",
99
+ 68: "Photograph",
100
+ 10: "Bedroom",
101
+ 103: "Window",
102
+ 62: "Paint",
103
+ 14: "Box",
104
+ 76: "Ring",
105
+ 82: "Soap",
106
+ 20: "Crowd",
107
+ 75: "Restaurant",
108
+ 98: "Train Station",
109
+ 31: "Friend",
110
+ 17: "Child",
111
+ 0: "Adult",
112
+ 46: "Library",
113
+ 39: "House",
114
+ 42: "India",
115
+ 86: "Street or Road",
116
+ 72: "Queen",
117
+ 85: "Store or Shop",
118
+ 64: "Park",
119
+ 77: "School",
120
+ 18: "City",
121
+ 49: "Market",
122
+ 60: "Office",
123
+ 132: "it",
124
+ 41: "I",
125
+ 6: "Bank",
126
+ 69: "Player",
127
+ 147: "she",
128
+ 19: "Court",
129
+ 155: "they",
130
+ 104: "Winter",
131
+ 93: "Temple",
132
+ 33: "God",
133
+ 50: "Marriage",
134
+ 29: "Exercise",
135
+ 37: "Hospital",
136
+ 34: "Ground",
137
+ 25: "Election",
138
+ 73: "Race (ethnicity)",
139
+ 11: "Bill",
140
+ 87: "Summer",
141
+ 160: "we",
142
+ 127: "he",
143
+ 22: "Death",
144
+ 84: "Spring",
145
+ 47: "Location",
146
+ 26: "Energy",
147
+ 54: "Money",
148
+ 28: "Ex. Monsoon",
149
+ 165: "you (plural)",
150
+ 65: "Peace",
151
+ 5: "Ball",
152
+ 71: "Price",
153
+ 35: "Gun",
154
+ 30: "Fall",
155
+ 164: "you",
156
+ 81: "Sign",
157
+ 100: "University",
158
+ 83: "Sport",
159
+ 74: "Religion",
160
+ 101: "War",
161
+ 57: "Newspaper",
162
+ 3: "Attack",
163
+ 90: "Team",
164
+ 78: "Science",
165
+ 79: "Season",
166
+ 52: "Medicine",
167
+ 91: "Technology",
168
+ }
model.py ADDED
@@ -0,0 +1,407 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from collections import OrderedDict
3
+
4
+ import torch
5
+ import torch.nn as nn
6
+
7
+ # def make_layers(block, no_relu_layers):
8
+ # layers = []
9
+ # for layer_name, v in block.items():
10
+ # if 'pool' in layer_name:
11
+ # layer = nn.MaxPool2d(kernel_size=v[0], stride=v[1],
12
+ # padding=v[2])
13
+ # layers.append((layer_name, layer))
14
+ # else:
15
+ # conv2d = nn.Conv2d(in_channels=v[0], out_channels=v[1],
16
+ # kernel_size=v[2], stride=v[3],
17
+ # padding=v[4])
18
+ # layers.append((layer_name, conv2d))
19
+ # if layer_name not in no_relu_layers:
20
+ # layers.append(('relu_'+layer_name, nn.ReLU(inplace=True)))
21
+
22
+ # return nn.Sequential(OrderedDict(layers))
23
+
24
+
25
+ def make_layers(block, no_relu_layers,prelu_layers = []):
26
+ layers = []
27
+
28
+ for layer_name, v in block.items():
29
+ if 'pool' in layer_name:
30
+ layer = nn.MaxPool2d(kernel_size=v[0], stride=v[1],
31
+ padding=v[2])
32
+ layers.append((layer_name, layer))
33
+ else:
34
+ #[3, 64, 3, 1, 1]
35
+ conv2d = nn.Conv2d(in_channels=v[0], out_channels=v[1],
36
+ kernel_size=v[2], stride=v[3],
37
+ padding=v[4])
38
+ layers.append((layer_name, conv2d))
39
+ if layer_name not in no_relu_layers:
40
+ if layer_name not in prelu_layers:
41
+ layers.append(('relu_'+layer_name, nn.ReLU(inplace=True)))
42
+ else:
43
+ layers.append(('prelu'+layer_name[4:],nn.PReLU(v[1])))
44
+
45
+ return nn.Sequential(OrderedDict(layers))
46
+
47
+
48
+ def make_layers_Mconv(block,no_relu_layers):
49
+ modules = []
50
+ for layer_name, v in block.items():
51
+ layers = []
52
+ if 'pool' in layer_name:
53
+ layer = nn.MaxPool2d(kernel_size=v[0], stride=v[1],
54
+ padding=v[2])
55
+ layers.append((layer_name, layer))
56
+ else:
57
+ conv2d = nn.Conv2d(in_channels=v[0], out_channels=v[1],
58
+ kernel_size=v[2], stride=v[3],
59
+ padding=v[4])
60
+ layers.append((layer_name, conv2d))
61
+ if layer_name not in no_relu_layers:
62
+ layers.append(('Mprelu'+layer_name[5:], nn.PReLU(v[1])))
63
+ modules.append(nn.Sequential(OrderedDict(layers)))
64
+ return nn.ModuleList(modules)
65
+
66
+ class bodypose_25_model(nn.Module):
67
+ def __init__(self):
68
+ super(bodypose_25_model,self).__init__()
69
+ # these layers have no relu layer
70
+ no_relu_layers = ['Mconv7_stage0_L1','Mconv7_stage0_L2',\
71
+ 'Mconv7_stage1_L1', 'Mconv7_stage1_L2',\
72
+ 'Mconv7_stage2_L2', 'Mconv7_stage3_L2']
73
+ prelu_layers = ['conv4_2','conv4_3_CPM','conv4_4_CPM']
74
+ blocks = {}
75
+ block0 = OrderedDict([
76
+ ('conv1_1', [3, 64, 3, 1, 1]),
77
+ ('conv1_2', [64, 64, 3, 1, 1]),
78
+ ('pool1_stage1', [2, 2, 0]),
79
+ ('conv2_1', [64, 128, 3, 1, 1]),
80
+ ('conv2_2', [128, 128, 3, 1, 1]),
81
+ ('pool2_stage1', [2, 2, 0]),
82
+ ('conv3_1', [128, 256, 3, 1, 1]),
83
+ ('conv3_2', [256, 256, 3, 1, 1]),
84
+ ('conv3_3', [256, 256, 3, 1, 1]),
85
+ ('conv3_4', [256, 256, 3, 1, 1]),
86
+ ('pool3_stage1', [2, 2, 0]),
87
+ ('conv4_1', [256, 512, 3, 1, 1]),
88
+ ('conv4_2', [512, 512, 3, 1, 1]),
89
+ ('conv4_3_CPM', [512, 256, 3, 1, 1]),
90
+ ('conv4_4_CPM', [256, 128, 3, 1, 1])
91
+ ])
92
+ self.model0 = make_layers(block0, no_relu_layers,prelu_layers)
93
+
94
+ #L2
95
+ #stage0
96
+ blocks['Mconv1_stage0_L2'] = OrderedDict([
97
+ ('Mconv1_stage0_L2_0',[128,96,3,1,1]),
98
+ ('Mconv1_stage0_L2_1',[96,96,3,1,1]),
99
+ ('Mconv1_stage0_L2_2',[96,96,3,1,1])
100
+ ])
101
+ for i in range(2,6):
102
+ blocks['Mconv%d_stage0_L2' % i] = OrderedDict([
103
+ ('Mconv%d_stage0_L2_0' % i,[288,96,3,1,1]),
104
+ ('Mconv%d_stage0_L2_1' % i,[96,96,3,1,1]),
105
+ ('Mconv%d_stage0_L2_2' % i,[96,96,3,1,1])
106
+ ])
107
+ blocks['Mconv6_7_stage0_L2'] = OrderedDict([
108
+ ('Mconv6_stage0_L2',[288, 256, 1,1,0]),
109
+ ('Mconv7_stage0_L2',[256,52,1,1,0])
110
+ ])
111
+ #stage1~3
112
+ for s in range(1,4):
113
+ blocks['Mconv1_stage%d_L2' % s] = OrderedDict([
114
+ ('Mconv1_stage%d_L2_0' % s,[180,128,3,1,1]),
115
+ ('Mconv1_stage%d_L2_1' % s,[128,128,3,1,1]),
116
+ ('Mconv1_stage%d_L2_2' % s,[128,128,3,1,1])
117
+ ])
118
+ for i in range(2,6):
119
+ blocks['Mconv%d_stage%d_L2' % (i,s)] = OrderedDict([
120
+ ('Mconv%d_stage%d_L2_0' % (i,s) ,[384,128,3,1,1]),
121
+ ('Mconv%d_stage%d_L2_1' % (i,s) ,[128,128,3,1,1]),
122
+ ('Mconv%d_stage%d_L2_2' % (i,s) ,[128,128,3,1,1])
123
+ ])
124
+ blocks['Mconv6_7_stage%d_L2' % s] = OrderedDict([
125
+ ('Mconv6_stage%d_L2' % s,[384,512,1,1,0]),
126
+ ('Mconv7_stage%d_L2' % s,[512,52,1,1,0])
127
+ ])
128
+
129
+ #L1
130
+ #stage0
131
+ blocks['Mconv1_stage0_L1'] = OrderedDict([
132
+ ('Mconv1_stage0_L1_0',[180,96,3,1,1]),
133
+ ('Mconv1_stage0_L1_1',[96,96,3,1,1]),
134
+ ('Mconv1_stage0_L1_2',[96,96,3,1,1])
135
+ ])
136
+ for i in range(2,6):
137
+ blocks['Mconv%d_stage0_L1' % i] = OrderedDict([
138
+ ('Mconv%d_stage0_L1_0' % i,[288,96,3,1,1]),
139
+ ('Mconv%d_stage0_L1_1' % i,[96,96,3,1,1]),
140
+ ('Mconv%d_stage0_L1_2' % i,[96,96,3,1,1])
141
+ ])
142
+ blocks['Mconv6_7_stage0_L1'] = OrderedDict([
143
+ ('Mconv6_stage0_L1',[288, 256, 1,1,0]),
144
+ ('Mconv7_stage0_L1',[256,26,1,1,0])
145
+ ])
146
+ #stage1
147
+ blocks['Mconv1_stage1_L1'] = OrderedDict([
148
+ ('Mconv1_stage1_L1_0',[206,128,3,1,1]),
149
+ ('Mconv1_stage1_L1_1',[128,128,3,1,1]),
150
+ ('Mconv1_stage1_L1_2',[128,128,3,1,1])
151
+ ])
152
+ for i in range(2,6):
153
+ blocks['Mconv%d_stage1_L1' % i] = OrderedDict([
154
+ ('Mconv%d_stage1_L1_0' % i,[384,128,3,1,1]),
155
+ ('Mconv%d_stage1_L1_1' % i,[128,128,3,1,1]),
156
+ ('Mconv%d_stage1_L1_2' % i,[128,128,3,1,1])
157
+ ])
158
+ blocks['Mconv6_7_stage1_L1'] = OrderedDict([
159
+ ('Mconv6_stage1_L1',[384,512,1,1,0]),
160
+ ('Mconv7_stage1_L1',[512,26,1,1,0])
161
+ ])
162
+
163
+ for k in blocks.keys():
164
+ blocks[k] = make_layers_Mconv(blocks[k], no_relu_layers)
165
+ self.models = nn.ModuleDict(blocks)
166
+ #self.model_L2_S0_mconv1 = blocks['Mconv1_stage0_L2']
167
+ for param in self.parameters():
168
+ param.requires_grad = False
169
+
170
+
171
+ def _Mconv_forward(self,x,models):
172
+ outs = []
173
+ out = x
174
+ for m in models:
175
+ out = m(out)
176
+ outs.append(out)
177
+ return torch.cat(outs,1)
178
+
179
+ def forward(self,x):
180
+ out0 = self.model0(x)
181
+ #L2
182
+ tout = out0
183
+ for s in range(4):
184
+ tout = self._Mconv_forward(tout,self.models['Mconv1_stage%d_L2' % s])
185
+ for v in range(2,6):
186
+ tout = self._Mconv_forward(tout,self.models['Mconv%d_stage%d_L2' % (v,s)])
187
+ tout = self.models['Mconv6_7_stage%d_L2' % s][0](tout)
188
+ tout = self.models['Mconv6_7_stage%d_L2' % s][1](tout)
189
+ outL2 = tout
190
+ tout = torch.cat([out0,tout],1)
191
+ #L1 stage0
192
+ #tout = torch.cat([out0,outL2],1)
193
+ tout = self._Mconv_forward(tout, self.models['Mconv1_stage0_L1'])
194
+ for v in range(2,6):
195
+ tout = self._Mconv_forward(tout, self.models['Mconv%d_stage0_L1' % v])
196
+ tout = self.models['Mconv6_7_stage0_L1'][0](tout)
197
+ tout = self.models['Mconv6_7_stage0_L1'][1](tout)
198
+ outS0L1 = tout
199
+ tout = torch.cat([out0,outS0L1,outL2],1)
200
+ #L1 stage1
201
+ tout = self._Mconv_forward(tout, self.models['Mconv1_stage1_L1'])
202
+ for v in range(2,6):
203
+ tout = self._Mconv_forward(tout, self.models['Mconv%d_stage1_L1' % v])
204
+ tout = self.models['Mconv6_7_stage1_L1'][0](tout)
205
+ outS1L1 = self.models['Mconv6_7_stage1_L1'][1](tout)
206
+
207
+ return outL2, outS1L1
208
+
209
+
210
+ class bodypose_model(nn.Module):
211
+ def __init__(self):
212
+ super(bodypose_model, self).__init__()
213
+
214
+ # these layers have no relu layer
215
+ no_relu_layers = ['conv5_5_CPM_L1', 'conv5_5_CPM_L2', 'Mconv7_stage2_L1',\
216
+ 'Mconv7_stage2_L2', 'Mconv7_stage3_L1', 'Mconv7_stage3_L2',\
217
+ 'Mconv7_stage4_L1', 'Mconv7_stage4_L2', 'Mconv7_stage5_L1',\
218
+ 'Mconv7_stage5_L2', 'Mconv7_stage6_L1', 'Mconv7_stage6_L1']
219
+ blocks = {}
220
+ block0 = OrderedDict([
221
+ ('conv1_1', [3, 64, 3, 1, 1]),
222
+ ('conv1_2', [64, 64, 3, 1, 1]),
223
+ ('pool1_stage1', [2, 2, 0]),
224
+ ('conv2_1', [64, 128, 3, 1, 1]),
225
+ ('conv2_2', [128, 128, 3, 1, 1]),
226
+ ('pool2_stage1', [2, 2, 0]),
227
+ ('conv3_1', [128, 256, 3, 1, 1]),
228
+ ('conv3_2', [256, 256, 3, 1, 1]),
229
+ ('conv3_3', [256, 256, 3, 1, 1]),
230
+ ('conv3_4', [256, 256, 3, 1, 1]),
231
+ ('pool3_stage1', [2, 2, 0]),
232
+ ('conv4_1', [256, 512, 3, 1, 1]),
233
+ ('conv4_2', [512, 512, 3, 1, 1]),
234
+ ('conv4_3_CPM', [512, 256, 3, 1, 1]),
235
+ ('conv4_4_CPM', [256, 128, 3, 1, 1])
236
+ ])
237
+
238
+
239
+ # Stage 1
240
+ block1_1 = OrderedDict([
241
+ ('conv5_1_CPM_L1', [128, 128, 3, 1, 1]),
242
+ ('conv5_2_CPM_L1', [128, 128, 3, 1, 1]),
243
+ ('conv5_3_CPM_L1', [128, 128, 3, 1, 1]),
244
+ ('conv5_4_CPM_L1', [128, 512, 1, 1, 0]),
245
+ ('conv5_5_CPM_L1', [512, 38, 1, 1, 0])
246
+ ])
247
+
248
+ block1_2 = OrderedDict([
249
+ ('conv5_1_CPM_L2', [128, 128, 3, 1, 1]),
250
+ ('conv5_2_CPM_L2', [128, 128, 3, 1, 1]),
251
+ ('conv5_3_CPM_L2', [128, 128, 3, 1, 1]),
252
+ ('conv5_4_CPM_L2', [128, 512, 1, 1, 0]),
253
+ ('conv5_5_CPM_L2', [512, 19, 1, 1, 0])
254
+ ])
255
+ blocks['block1_1'] = block1_1
256
+ blocks['block1_2'] = block1_2
257
+
258
+ self.model0 = make_layers(block0, no_relu_layers)
259
+
260
+ # Stages 2 - 6
261
+ for i in range(2, 7):
262
+ blocks['block%d_1' % i] = OrderedDict([
263
+ ('Mconv1_stage%d_L1' % i, [185, 128, 7, 1, 3]),
264
+ ('Mconv2_stage%d_L1' % i, [128, 128, 7, 1, 3]),
265
+ ('Mconv3_stage%d_L1' % i, [128, 128, 7, 1, 3]),
266
+ ('Mconv4_stage%d_L1' % i, [128, 128, 7, 1, 3]),
267
+ ('Mconv5_stage%d_L1' % i, [128, 128, 7, 1, 3]),
268
+ ('Mconv6_stage%d_L1' % i, [128, 128, 1, 1, 0]),
269
+ ('Mconv7_stage%d_L1' % i, [128, 38, 1, 1, 0])
270
+ ])
271
+
272
+ blocks['block%d_2' % i] = OrderedDict([
273
+ ('Mconv1_stage%d_L2' % i, [185, 128, 7, 1, 3]),
274
+ ('Mconv2_stage%d_L2' % i, [128, 128, 7, 1, 3]),
275
+ ('Mconv3_stage%d_L2' % i, [128, 128, 7, 1, 3]),
276
+ ('Mconv4_stage%d_L2' % i, [128, 128, 7, 1, 3]),
277
+ ('Mconv5_stage%d_L2' % i, [128, 128, 7, 1, 3]),
278
+ ('Mconv6_stage%d_L2' % i, [128, 128, 1, 1, 0]),
279
+ ('Mconv7_stage%d_L2' % i, [128, 19, 1, 1, 0])
280
+ ])
281
+
282
+ for k in blocks.keys():
283
+ blocks[k] = make_layers(blocks[k], no_relu_layers)
284
+
285
+ self.model1_1 = blocks['block1_1']
286
+ self.model2_1 = blocks['block2_1']
287
+ self.model3_1 = blocks['block3_1']
288
+ self.model4_1 = blocks['block4_1']
289
+ self.model5_1 = blocks['block5_1']
290
+ self.model6_1 = blocks['block6_1']
291
+
292
+ self.model1_2 = blocks['block1_2']
293
+ self.model2_2 = blocks['block2_2']
294
+ self.model3_2 = blocks['block3_2']
295
+ self.model4_2 = blocks['block4_2']
296
+ self.model5_2 = blocks['block5_2']
297
+ self.model6_2 = blocks['block6_2']
298
+ for param in self.parameters():
299
+ param.requires_grad = False
300
+
301
+
302
+ def forward(self, x):
303
+
304
+ out1 = self.model0(x)
305
+
306
+ out1_1 = self.model1_1(out1)
307
+ out1_2 = self.model1_2(out1)
308
+ out2 = torch.cat([out1_1, out1_2, out1], 1)
309
+
310
+ out2_1 = self.model2_1(out2)
311
+ out2_2 = self.model2_2(out2)
312
+ out3 = torch.cat([out2_1, out2_2, out1], 1)
313
+
314
+ out3_1 = self.model3_1(out3)
315
+ out3_2 = self.model3_2(out3)
316
+ out4 = torch.cat([out3_1, out3_2, out1], 1)
317
+
318
+ out4_1 = self.model4_1(out4)
319
+ out4_2 = self.model4_2(out4)
320
+ out5 = torch.cat([out4_1, out4_2, out1], 1)
321
+
322
+ out5_1 = self.model5_1(out5)
323
+ out5_2 = self.model5_2(out5)
324
+ out6 = torch.cat([out5_1, out5_2, out1], 1)
325
+
326
+ out6_1 = self.model6_1(out6)
327
+ out6_2 = self.model6_2(out6)
328
+
329
+ return out6_1, out6_2
330
+
331
+ class handpose_model(nn.Module):
332
+ def __init__(self):
333
+ super(handpose_model, self).__init__()
334
+
335
+ # these layers have no relu layer
336
+ no_relu_layers = ['conv6_2_CPM', 'Mconv7_stage2', 'Mconv7_stage3',\
337
+ 'Mconv7_stage4', 'Mconv7_stage5', 'Mconv7_stage6']
338
+ # stage 1
339
+ block1_0 = OrderedDict([
340
+ ('conv1_1', [3, 64, 3, 1, 1]),
341
+ ('conv1_2', [64, 64, 3, 1, 1]),
342
+ ('pool1_stage1', [2, 2, 0]),
343
+ ('conv2_1', [64, 128, 3, 1, 1]),
344
+ ('conv2_2', [128, 128, 3, 1, 1]),
345
+ ('pool2_stage1', [2, 2, 0]),
346
+ ('conv3_1', [128, 256, 3, 1, 1]),
347
+ ('conv3_2', [256, 256, 3, 1, 1]),
348
+ ('conv3_3', [256, 256, 3, 1, 1]),
349
+ ('conv3_4', [256, 256, 3, 1, 1]),
350
+ ('pool3_stage1', [2, 2, 0]),
351
+ ('conv4_1', [256, 512, 3, 1, 1]),
352
+ ('conv4_2', [512, 512, 3, 1, 1]),
353
+ ('conv4_3', [512, 512, 3, 1, 1]),
354
+ ('conv4_4', [512, 512, 3, 1, 1]),
355
+ ('conv5_1', [512, 512, 3, 1, 1]),
356
+ ('conv5_2', [512, 512, 3, 1, 1]),
357
+ ('conv5_3_CPM', [512, 128, 3, 1, 1])
358
+ ])
359
+
360
+ block1_1 = OrderedDict([
361
+ ('conv6_1_CPM', [128, 512, 1, 1, 0]),
362
+ ('conv6_2_CPM', [512, 22, 1, 1, 0])
363
+ ])
364
+
365
+ blocks = {}
366
+ blocks['block1_0'] = block1_0
367
+ blocks['block1_1'] = block1_1
368
+
369
+ # stage 2-6
370
+ for i in range(2, 7):
371
+ blocks['block%d' % i] = OrderedDict([
372
+ ('Mconv1_stage%d' % i, [150, 128, 7, 1, 3]),
373
+ ('Mconv2_stage%d' % i, [128, 128, 7, 1, 3]),
374
+ ('Mconv3_stage%d' % i, [128, 128, 7, 1, 3]),
375
+ ('Mconv4_stage%d' % i, [128, 128, 7, 1, 3]),
376
+ ('Mconv5_stage%d' % i, [128, 128, 7, 1, 3]),
377
+ ('Mconv6_stage%d' % i, [128, 128, 1, 1, 0]),
378
+ ('Mconv7_stage%d' % i, [128, 22, 1, 1, 0])
379
+ ])
380
+
381
+ for k in blocks.keys():
382
+ blocks[k] = make_layers(blocks[k], no_relu_layers)
383
+
384
+ self.model1_0 = blocks['block1_0']
385
+ self.model1_1 = blocks['block1_1']
386
+ self.model2 = blocks['block2']
387
+ self.model3 = blocks['block3']
388
+ self.model4 = blocks['block4']
389
+ self.model5 = blocks['block5']
390
+ self.model6 = blocks['block6']
391
+ for param in self.parameters():
392
+ param.requires_grad = False
393
+
394
+ def forward(self, x):
395
+ out1_0 = self.model1_0(x)
396
+ out1_1 = self.model1_1(out1_0)
397
+ concat_stage2 = torch.cat([out1_1, out1_0], 1)
398
+ out_stage2 = self.model2(concat_stage2)
399
+ concat_stage3 = torch.cat([out_stage2, out1_0], 1)
400
+ out_stage3 = self.model3(concat_stage3)
401
+ concat_stage4 = torch.cat([out_stage3, out1_0], 1)
402
+ out_stage4 = self.model4(concat_stage4)
403
+ concat_stage5 = torch.cat([out_stage4, out1_0], 1)
404
+ out_stage5 = self.model5(concat_stage5)
405
+ concat_stage6 = torch.cat([out_stage5, out1_0], 1)
406
+ out_stage6 = self.model6(concat_stage6)
407
+ return out_stage6
packages.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ ffmpeg
2
+ libgl1
3
+ libglib2.0-0
4
+ libsm6
5
+ libxrender1
6
+ libxext6
requirements.txt ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ opencv_python_headless
2
+ streamlit
3
+ numpy
4
+ Pillow
5
+ numpy
6
+ matplotlib
7
+ opencv-python
8
+ scipy
9
+ scikit-image
10
+ tqdm
11
+ pandas
12
+ torch
13
+ torchaudio
14
+ torchvision
15
+ torchtext
16
+ torchdata
17
+ av
18
+ keras
19
+ ffmpeg
20
+ ffmpeg-python
21
+ seaborn[stats]
22
+ huggingface_hub
util.py ADDED
@@ -0,0 +1,463 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import math
3
+ import cv2
4
+ import matplotlib
5
+ from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas
6
+ from matplotlib.figure import Figure
7
+ import numpy as np
8
+ import matplotlib.pyplot as plt
9
+ import cv2
10
+ import copy
11
+ import seaborn as sns
12
+
13
+ def padRightDownCorner(img, stride, padValue):
14
+ h = img.shape[0]
15
+ w = img.shape[1]
16
+
17
+ pad = 4 * [None]
18
+ pad[0] = 0 # up
19
+ pad[1] = 0 # left
20
+ pad[2] = 0 if (h % stride == 0) else stride - (h % stride) # down
21
+ pad[3] = 0 if (w % stride == 0) else stride - (w % stride) # right
22
+
23
+ img_padded = img
24
+ pad_up = np.tile(img_padded[0:1, :, :]*0 + padValue, (pad[0], 1, 1))
25
+ img_padded = np.concatenate((pad_up, img_padded), axis=0)
26
+ pad_left = np.tile(img_padded[:, 0:1, :]*0 + padValue, (1, pad[1], 1))
27
+ img_padded = np.concatenate((pad_left, img_padded), axis=1)
28
+ pad_down = np.tile(img_padded[-2:-1, :, :]*0 + padValue, (pad[2], 1, 1))
29
+ img_padded = np.concatenate((img_padded, pad_down), axis=0)
30
+ pad_right = np.tile(img_padded[:, -2:-1, :]*0 + padValue, (1, pad[3], 1))
31
+ img_padded = np.concatenate((img_padded, pad_right), axis=1)
32
+
33
+ return img_padded, pad
34
+
35
+ # transfer caffe model to pytorch which will match the layer name
36
+ def transfer(model, model_weights):
37
+ transfered_model_weights = {}
38
+ for weights_name in model.state_dict().keys():
39
+ if len(weights_name.split('.'))>4: # body25
40
+ transfered_model_weights[weights_name] = model_weights['.'.join(
41
+ weights_name.split('.')[3:])]
42
+ else:
43
+ transfered_model_weights[weights_name] = model_weights['.'.join(
44
+ weights_name.split('.')[1:])]
45
+ return transfered_model_weights
46
+
47
+ # draw the body keypoint and lims
48
+ def draw_bodypose(canvas, candidate, subset, model_type='body25'):
49
+ stickwidth = 4
50
+ if model_type == 'body25':
51
+ limbSeq = [[1,0],[1,2],[2,3],[3,4],[1,5],[5,6],[6,7],[1,8],[8,9],[9,10],\
52
+ [10,11],[8,12],[12,13],[13,14],[0,15],[0,16],[15,17],[16,18],\
53
+ [11,24],[11,22],[14,21],[14,19],[22,23],[19,20]]
54
+ njoint = 25
55
+ else:
56
+ limbSeq = [[1, 2], [1, 5], [2, 3], [3, 4], [5, 6], [6, 7], [1, 8], [8, 9], \
57
+ [9, 10], [1, 11], [11, 12], [12, 13], [1, 0], [0, 14], [14, 16], \
58
+ [0, 15], [15, 17], [2, 16], [5, 17]]
59
+ njoint = 18
60
+
61
+ # colors = [[255, 0, 0], [255, 85, 0], [255, 170, 0], [255, 255, 0], [170, 255, 0], [85, 255, 0], [0, 255, 0], \
62
+ # [0, 255, 85], [0, 255, 170], [0, 255, 255], [0, 170, 255], [0, 85, 255], [0, 0, 255], [85, 0, 255], \
63
+ # [170, 0, 255], [255, 0, 255], [255, 0, 170], [255, 0, 85]]
64
+
65
+ colors = [[255, 0, 0], [255, 85, 0], [255, 170, 0], [255, 255, 0], [170, 255, 0], [85, 255, 0], [0, 255, 0], \
66
+ [0, 255, 85], [0, 255, 170], [0, 255, 255], [0, 170, 255], [0, 85, 255], [0, 0, 255], [85, 0, 255], \
67
+ [170, 0, 255], [255, 0, 255], [255, 0, 170], [255, 0, 85], [255,255,0], [255,255,85], [255,255,170],\
68
+ [255,255,255],[170,255,255],[85,255,255],[0,255,255]]
69
+
70
+ for i in range(njoint):
71
+ for n in range(len(subset)):
72
+ index = int(subset[n][i])
73
+ if index == -1:
74
+ continue
75
+ x, y = candidate[index][0:2]
76
+ cv2.circle(canvas, (int(x), int(y)), 4, colors[i], thickness=-1)
77
+ for i in range(njoint-1):
78
+ for n in range(len(subset)):
79
+ index = subset[n][np.array(limbSeq[i])]
80
+ if -1 in index:
81
+ continue
82
+ cur_canvas = canvas.copy()
83
+ Y = candidate[index.astype(int), 0]
84
+ X = candidate[index.astype(int), 1]
85
+ mX = np.mean(X)
86
+ mY = np.mean(Y)
87
+ length = ((X[0] - X[1]) ** 2 + (Y[0] - Y[1]) ** 2) ** 0.5
88
+ angle = math.degrees(math.atan2(X[0] - X[1], Y[0] - Y[1]))
89
+ # print('original (mX,mY,length,angle)',(mX,mY,length,angle))
90
+ # print(f'original cv2.ellipse2Poly((int({mY}), int({mX})), (int({length} / 2), {stickwidth}), int({angle}), 0, 360, 1)')
91
+ polygon = cv2.ellipse2Poly((int(mY), int(mX)), (int(length / 2), stickwidth), int(angle), 0, 360, 1)
92
+ # print(f'cv2.fillConvexPoly(cur_canvas, polygon, colors[i])')
93
+ cv2.fillConvexPoly(cur_canvas, polygon, colors[i])
94
+ canvas = cv2.addWeighted(canvas, 0.4, cur_canvas, 0.6, 0)
95
+ # plt.imsave("preview.jpg", canvas[:, :, [2, 1, 0]])
96
+ # plt.imshow(canvas[:, :, [2, 1, 0]])
97
+ return canvas
98
+ #subsets [[0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, -1.0, 11.0, 12.0, -1.0, 13.0, 14.0, 15.0, 16.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, 26.650803712300775, 17.0]]
99
+ #candidates [[983.0, 172.0, 0.8991263508796692, 0.0], [980.0, 352.0, 0.930037796497345, 1.0], [848.0, 342.0, 0.8652207255363464, 2.0], [811.0, 598.0, 0.8107873797416687, 3.0], [806.0, 817.0, 0.7464589476585388, 4.0], [1120.0, 361.0, 0.8538270592689514, 5.0], [1148.0, 601.0, 0.6797391176223755, 6.0], [1149.0, 834.0, 0.5189468264579773, 7.0], [968.0, 757.0, 0.6468111276626587, 8.0], [876.0, 756.0, 0.6387956142425537, 9.0], [854.0, 1072.0, 0.4211728572845459, 10.0], [1057.0, 759.0, 0.6311940550804138, 11.0], [1038.0, 1072.0, 0.38531172275543213, 12.0], [955.0, 146.0, 0.925083339214325, 13.0], [1016.0, 151.0, 0.9023998379707336, 14.0], [909.0, 167.0, 0.9096773862838745, 15.0], [1057.0, 173.0, 0.8605436086654663, 16.0]]
100
+ def get_bodypose(candidate, subset, model_type='coco'):
101
+ stickwidth = 4
102
+ if model_type == 'body25':
103
+ limbSeq = [[1,0],[1,2],[2,3],[3,4],[1,5],[5,6],[6,7],[1,8],[8,9],[9,10],\
104
+ [10,11],[8,12],[12,13],[13,14],[0,15],[0,16],[15,17],[16,18],\
105
+ [11,24],[11,22],[14,21],[14,19],[22,23],[19,20]]
106
+ njoint = 25
107
+ else:
108
+ limbSeq = [[1, 2], [1, 5], [2, 3], [3, 4], [5, 6], [6, 7], [1, 8], [8, 9], \
109
+ [9, 10], [1, 11], [11, 12], [12, 13], [1, 0], [0, 14], [14, 16], \
110
+ [0, 15], [15, 17], [2, 16], [5, 17]]
111
+ njoint = 18
112
+
113
+ # colors = [[255, 0, 0], [255, 85, 0], [255, 170, 0], [255, 255, 0], [170, 255, 0], [85, 255, 0], [0, 255, 0], \
114
+ # [0, 255, 85], [0, 255, 170], [0, 255, 255], [0, 170, 255], [0, 85, 255], [0, 0, 255], [85, 0, 255], \
115
+ # [170, 0, 255], [255, 0, 255], [255, 0, 170], [255, 0, 85]]
116
+
117
+ colors = [[255, 0, 0], [255, 85, 0], [255, 170, 0], [255, 255, 0], [170, 255, 0], [85, 255, 0], [0, 255, 0], \
118
+ [0, 255, 85], [0, 255, 170], [0, 255, 255], [0, 170, 255], [0, 85, 255], [0, 0, 255], [85, 0, 255], \
119
+ [170, 0, 255], [255, 0, 255], [255, 0, 170], [255, 0, 85], [255,255,0], [255,255,85], [255,255,170],\
120
+ [255,255,255],[170,255,255],[85,255,255],[0,255,255]]
121
+
122
+ x_y_circles=[]
123
+ for i in range(njoint):
124
+ for n in range(len(subset)):
125
+ index = int(subset[n][i])
126
+ if index == -1:
127
+ continue
128
+ x, y = candidate[index][0:2] # 983.0, 172.0
129
+ x_y_circles.append((x, y))
130
+ # cv2.circle(canvas, (int(x), int(y)), 4, colors[i], thickness=-1)
131
+
132
+ x_y_sticks=[]
133
+ for i in range(njoint-1):
134
+ for n in range(len(subset)):
135
+ index = subset[n][np.array(limbSeq[i])] #0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, -1.0, 11.0, 12.0, -1.0, 13.0, 14.0, 15.0, 16.0, -1.0, -1.0, -1.0, -1.0, -1.0
136
+ if -1 in index:
137
+ continue
138
+ # cur_canvas = canvas.copy()
139
+ Y = candidate[index.astype(int), 0]
140
+ X = candidate[index.astype(int), 1]
141
+ mX = np.mean(X)
142
+ mY = np.mean(Y)
143
+ length = ((X[0] - X[1]) ** 2 + (Y[0] - Y[1]) ** 2) ** 0.5
144
+ angle = math.degrees(math.atan2(X[0] - X[1], Y[0] - Y[1]))
145
+ x_y_sticks.append((mY, mX,angle,length))
146
+ # print('new (mX,mY,length,angle)',(mX,mY,length,angle))
147
+ # polygon = cv2.ellipse2Poly((int(mY), int(mX)), (int(length / 2), stickwidth), int(angle), 0, 360, 1)
148
+ # cv2.fillConvexPoly(cur_canvas, polygon, colors[i])
149
+ # canvas = cv2.addWeighted(canvas, 0.4, cur_canvas, 0.6, 0)
150
+ # plt.imsave("preview.jpg", canvas[:, :, [2, 1, 0]])
151
+ # plt.imshow(canvas[:, :, [2, 1, 0]])
152
+ return (x_y_circles,x_y_sticks,)
153
+
154
+ #all_hands_peaks[[[0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [1100, 858], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0]], [[0, 0], [858, 859], [868, 894], [873, 938], [0, 0], [802, 920], [807, 961], [821, 977], [836, 992], [0, 0], [781, 955], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0]]]
155
+ def draw_handpose(canvas, all_hand_peaks, show_number=False):
156
+ edges = [[0, 1], [1, 2], [2, 3], [3, 4], [0, 5], [5, 6], [6, 7], [7, 8], [0, 9], [9, 10], \
157
+ [10, 11], [11, 12], [0, 13], [13, 14], [14, 15], [15, 16], [0, 17], [17, 18], [18, 19], [19, 20]]
158
+ fig = Figure(figsize=plt.figaspect(canvas))
159
+
160
+ fig.subplots_adjust(0, 0, 1, 1)
161
+ fig.subplots_adjust(bottom=0, top=1, left=0, right=1)
162
+ bg = FigureCanvas(fig)
163
+ ax = fig.subplots()
164
+ ax.axis('off')
165
+ ax.imshow(canvas)
166
+
167
+ width, height = ax.figure.get_size_inches() * ax.figure.get_dpi()
168
+
169
+ for peaks in all_hand_peaks:
170
+ for ie, e in enumerate(edges):
171
+ if np.sum(np.all(peaks[e], axis=1)==0)==0:
172
+ x1, y1 = peaks[e[0]]
173
+ x2, y2 = peaks[e[1]]
174
+ # print(f'original ax.plot([{x1}, {x2}], [{y1}, {y2}], color=matplotlib.colors.hsv_to_rgb([ie/float({len(edges)}), 1.0, 1.0]))')
175
+ ax.plot([x1, x2], [y1, y2], color=matplotlib.colors.hsv_to_rgb([ie/float(len(edges)), 1.0, 1.0]))
176
+
177
+ for i, keyponit in enumerate(peaks):
178
+ x, y = keyponit
179
+ # print(f"original ax.plot({x}, {y}, 'r.')")
180
+ ax.plot(x, y, 'r.')
181
+ if show_number:
182
+ ax.text(x, y, str(i))
183
+ # print(f'width = {width}, height={height}')
184
+ bg.draw()
185
+ canvas = np.fromstring(bg.tostring_rgb(), dtype='uint8').reshape(int(height), int(width), 3)
186
+ return canvas
187
+
188
+ def get_handpose(all_hand_peaks, show_number=False):
189
+ edges = [[0, 1], [1, 2], [2, 3], [3, 4], [0, 5], [5, 6], [6, 7], [7, 8], [0, 9], [9, 10], \
190
+ [10, 11], [11, 12], [0, 13], [13, 14], [14, 15], [15, 16], [0, 17], [17, 18], [18, 19], [19, 20]]
191
+ # fig = Figure(figsize=plt.figaspect(canvas))
192
+
193
+ # fig.subplots_adjust(0, 0, 1, 1)
194
+ # fig.subplots_adjust(bottom=0, top=1, left=0, right=1)
195
+ # bg = FigureCanvas(fig)
196
+ # ax = fig.subplots()
197
+ # ax.axis('off')
198
+ # ax.imshow(canvas)
199
+
200
+ # width, height = ax.figure.get_size_inches() * ax.figure.get_dpi()
201
+ export_edges=[[],[]]
202
+ export_peaks=[[],[]]
203
+ for idx,peaks in enumerate(all_hand_peaks):
204
+ for ie, e in enumerate(edges):
205
+ if np.sum(np.all(peaks[e], axis=1)==0)==0:
206
+ x1, y1 = peaks[e[0]]
207
+ x2, y2 = peaks[e[1]]
208
+ export_edges[idx].append((ie,(x1, y1),(x2, y2)))
209
+ # ax.plot([x1, x2], [y1, y2], color=matplotlib.colors.hsv_to_rgb([ie/float(len(edges)), 1.0, 1.0]))
210
+
211
+ for i, keyponit in enumerate(peaks):
212
+ x, y = keyponit
213
+ # ax.plot(x, y, 'r.')
214
+ # if show_number:
215
+ # ax.text(x, y, str(i))
216
+
217
+ export_peaks[idx].append((x,y,str(i)))
218
+ # bg.draw()
219
+ # canvas = np.fromstring(bg.tostring_rgb(), dtype='uint8').reshape(int(height), int(width), 3)
220
+ return (export_edges,export_peaks)
221
+
222
+ # image drawed by opencv is not good.
223
+ def draw_handpose_by_opencv(canvas, peaks, show_number=False):
224
+ edges = [[0, 1], [1, 2], [2, 3], [3, 4], [0, 5], [5, 6], [6, 7], [7, 8], [0, 9], [9, 10], \
225
+ [10, 11], [11, 12], [0, 13], [13, 14], [14, 15], [15, 16], [0, 17], [17, 18], [18, 19], [19, 20]]
226
+ # cv2.rectangle(canvas, (x, y), (x+w, y+w), (0, 255, 0), 2, lineType=cv2.LINE_AA)
227
+ # cv2.putText(canvas, 'left' if is_left else 'right', (x, y), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
228
+ for ie, e in enumerate(edges):
229
+ if np.sum(np.all(peaks[e], axis=1)==0)==0:
230
+ x1, y1 = peaks[e[0]]
231
+ x2, y2 = peaks[e[1]]
232
+ cv2.line(canvas, (x1, y1), (x2, y2), matplotlib.colors.hsv_to_rgb([ie/float(len(edges)), 1.0, 1.0])*255, thickness=2)
233
+
234
+ for i, keyponit in enumerate(peaks):
235
+ x, y = keyponit
236
+ cv2.circle(canvas, (x, y), 4, (0, 0, 255), thickness=-1)
237
+ if show_number:
238
+ cv2.putText(canvas, str(i), (x, y), cv2.FONT_HERSHEY_SIMPLEX, 0.3, (0, 0, 0), lineType=cv2.LINE_AA)
239
+ return canvas
240
+
241
+ # detect hand according to body pose keypoints
242
+ # please refer to https://github.com/CMU-Perceptual-Computing-Lab/openpose/blob/master/src/openpose/hand/handDetector.cpp
243
+ def handDetect(candidate, subset, oriImg):
244
+ # right hand: wrist 4, elbow 3, shoulder 2
245
+ # left hand: wrist 7, elbow 6, shoulder 5
246
+ ratioWristElbow = 0.33
247
+ detect_result = []
248
+
249
+ image_height, image_width = oriImg.shape[0:2]
250
+ #print(f'handDetect ---------- {image_height}, {image_width}')
251
+ for person in subset.astype(int):
252
+ # if any of three not detected
253
+ has_left = np.sum(person[[5, 6, 7]] == -1) == 0
254
+ has_right = np.sum(person[[2, 3, 4]] == -1) == 0
255
+ if not (has_left or has_right):
256
+ continue
257
+ hands = []
258
+ #left hand
259
+ if has_left:
260
+ left_shoulder_index, left_elbow_index, left_wrist_index = person[[5, 6, 7]]
261
+ x1, y1 = candidate[left_shoulder_index][:2]
262
+ x2, y2 = candidate[left_elbow_index][:2]
263
+ x3, y3 = candidate[left_wrist_index][:2]
264
+ hands.append([x1, y1, x2, y2, x3, y3, True])
265
+ # right hand
266
+ if has_right:
267
+ right_shoulder_index, right_elbow_index, right_wrist_index = person[[2, 3, 4]]
268
+ x1, y1 = candidate[right_shoulder_index][:2]
269
+ x2, y2 = candidate[right_elbow_index][:2]
270
+ x3, y3 = candidate[right_wrist_index][:2]
271
+ hands.append([x1, y1, x2, y2, x3, y3, False])
272
+
273
+ for x1, y1, x2, y2, x3, y3, is_left in hands:
274
+ # pos_hand = pos_wrist + ratio * (pos_wrist - pos_elbox) = (1 + ratio) * pos_wrist - ratio * pos_elbox
275
+ # handRectangle.x = posePtr[wrist*3] + ratioWristElbow * (posePtr[wrist*3] - posePtr[elbow*3]);
276
+ # handRectangle.y = posePtr[wrist*3+1] + ratioWristElbow * (posePtr[wrist*3+1] - posePtr[elbow*3+1]);
277
+ # const auto distanceWristElbow = getDistance(poseKeypoints, person, wrist, elbow);
278
+ # const auto distanceElbowShoulder = getDistance(poseKeypoints, person, elbow, shoulder);
279
+ # handRectangle.width = 1.5f * fastMax(distanceWristElbow, 0.9f * distanceElbowShoulder);
280
+ x = x3 + ratioWristElbow * (x3 - x2)
281
+ y = y3 + ratioWristElbow * (y3 - y2)
282
+ distanceWristElbow = math.sqrt((x3 - x2) ** 2 + (y3 - y2) ** 2)
283
+ distanceElbowShoulder = math.sqrt((x2 - x1) ** 2 + (y2 - y1) ** 2)
284
+ width = 1.5 * max(distanceWristElbow, 0.9 * distanceElbowShoulder)
285
+ # x-y refers to the center --> offset to topLeft point
286
+ # handRectangle.x -= handRectangle.width / 2.f;
287
+ # handRectangle.y -= handRectangle.height / 2.f;
288
+ x -= width / 2
289
+ y -= width / 2 # width = height
290
+ # overflow the image
291
+ if x < 0: x = 0
292
+ if y < 0: y = 0
293
+ width1 = width
294
+ width2 = width
295
+ if x + width > image_width: width1 = image_width - x
296
+ if y + width > image_height: width2 = image_height - y
297
+ width = min(width1, width2)
298
+ # the max hand box value is 20 pixels
299
+ if width >= 20:
300
+ detect_result.append([int(x), int(y), int(width), is_left])
301
+
302
+ '''
303
+ return value: [[x, y, w, True if left hand else False]].
304
+ width=height since the network require squared input.
305
+ x, y is the coordinate of top left
306
+ '''
307
+ return detect_result
308
+
309
+ def drawStickmodel(oriImg,x_ytupple,x_y_sticks,export_edges,export_peaks):
310
+ canvas = copy.deepcopy(oriImg)
311
+
312
+
313
+ colors = [[255, 0, 0], [255, 85, 0], [255, 170, 0], [255, 255, 0], [170, 255, 0], [85, 255, 0], [0, 255, 0],
314
+ [0, 255, 85], [0, 255, 170], [0, 255, 255], [0, 170, 255], [0, 85, 255], [0, 0, 255], [85, 0, 255],
315
+ [170, 0, 255], [255, 0, 255], [255, 0, 170], [255, 0, 85], [255,255,0], [255,255,85], [255,255,170],
316
+ [255,255,255],[170,255,255],[85,255,255],[0,255,255]]
317
+ stickwidth=4
318
+
319
+ for idx,(mX,mY,angle,length) in enumerate(x_y_sticks):
320
+ cur_canvas = canvas.copy()
321
+ # print(f'new cv2.ellipse2Poly((int({mY}), int({mX})), (int({length} / 2), {stickwidth}), int({angle}), 0, 360, 1)')
322
+ polygon = cv2.ellipse2Poly((int(mX), int(mY)), (int(length / 2), stickwidth), int(angle), 0, 360, 1)
323
+ cv2.fillConvexPoly(cur_canvas, polygon, colors[idx])
324
+ canvas = cv2.addWeighted(canvas, 0.4, cur_canvas, 0.6, 0)
325
+
326
+
327
+
328
+ for idx,(x,y) in enumerate(x_ytupple):
329
+ cv2.circle(canvas, (int(x), int(y)), 4, colors[idx], thickness=-1)
330
+
331
+
332
+ ## Handpose
333
+ fig = Figure(figsize=plt.figaspect(canvas))
334
+ fig.subplots_adjust(0, 0, 1, 1)
335
+ fig.subplots_adjust(bottom=0, top=1, left=0, right=1)
336
+ bg = FigureCanvas(fig)
337
+ ax = fig.subplots()
338
+ ax.axis('off')
339
+ ax.imshow(canvas)
340
+
341
+ edges = [[0, 1], [1, 2], [2, 3], [3, 4], [0, 5], [5, 6], [6, 7], [7, 8], [0, 9], [9, 10], \
342
+ [10, 11], [11, 12], [0, 13], [13, 14], [14, 15], [15, 16], [0, 17], [17, 18], [18, 19], [19, 20]]
343
+
344
+ for both_hand_edges in export_edges:
345
+ for (ie,(x1, y1),(x2, y2)) in both_hand_edges:
346
+ # print(f'new ax.plot([{x1}, {x2}], [{y1}, {y2}], color=matplotlib.colors.hsv_to_rgb([ie/float({len(edges)}), 1.0, 1.0]))')
347
+ ax.plot([x1, x2], [y1, y2], color=matplotlib.colors.hsv_to_rgb([ie/float(len(edges)), 1.0, 1.0]))
348
+
349
+ width, height = ax.figure.get_size_inches() * ax.figure.get_dpi()
350
+
351
+ for both_hand_peaks in export_peaks:
352
+ for (x,y,text) in both_hand_peaks:
353
+ # print(f"new ax.plot({x}, {y}, 'r.')")
354
+ ax.plot(x, y, 'r.')
355
+
356
+
357
+
358
+ # print(f'NEW width = {width}, height={height}')
359
+ bg.draw()
360
+
361
+ canvas = np.fromstring(bg.tostring_rgb(), dtype='uint8').reshape(int(height), int(width), 3)
362
+
363
+ ####
364
+
365
+
366
+ # cv2.imwrite('C:/Users/spsar/Downloads/MVI_5177.MOV-transformed/MVI_5177.MOV-GaussianBlur/MVI_5177.MOV-14-modified.jpg', canvas)
367
+ return cv2.resize(canvas,(math.ceil(width),math.ceil(height)))
368
+
369
+ def draw_bar_plot_below_image(image, predictions, title, origImg):
370
+ """
371
+ Draws a bar plot of predictions below an image using OpenCV and Matplotlib.
372
+
373
+ Args:
374
+ image (numpy.ndarray): The image to display.
375
+ predictions (numpy.ndarray): Array containing prediction probabilities.
376
+ """
377
+
378
+
379
+
380
+ fig, ax = plt.subplots(figsize=(origImg.shape[1]/100,origImg.shape[0]/200), dpi=100)
381
+ plt.title(title)
382
+ # Create a figure and plot the bar chart
383
+ labels = list(predictions.keys())
384
+ probabilities = list(predictions.values())
385
+
386
+ # Create a Seaborn bar plot
387
+ sns.barplot(x=labels, y=probabilities,ax=ax) # Default color palette used
388
+ plt.close(fig) # Close plot to avoid memory leaks
389
+ fig.canvas.draw()
390
+ # Convert the plot to a NumPy array for manipulation
391
+ plot_image = np.array(fig.canvas.renderer.buffer_rgba())[:, :, :3] # Remove alpha channel
392
+
393
+ # Resize the plot image to match the width of the original image
394
+ # plot_image = cv2.resize(plot_image, (image.shape[1], math.ceil(image.shape[0] * 0.8))) # Adjust height ratio as needed
395
+
396
+ # Combine the image and plot image vertically (stacking)
397
+ combined_image = np.vstack((image, cv2.resize(plot_image,(image.shape[1],plot_image.shape[0]))))
398
+
399
+ return combined_image
400
+
401
+ def add_padding_to_bottom(image, pad_value, pad_height):
402
+ """
403
+ Adds padding to the bottom of an image with a specified value.
404
+
405
+ Args:
406
+ image (numpy.ndarray): The input image.
407
+ pad_value (tuple or int): The color value to fill the padding area.
408
+ pad_height (int): The height of the padding to add at the bottom.
409
+
410
+ Returns:
411
+ numpy.ndarray: The image with padding added.
412
+ """
413
+
414
+ # Get image dimensions
415
+ height, width, channels = image.shape
416
+ padding=np.zeros((pad_height, width, channels), dtype=image.dtype)
417
+ padding[:,:,:]=pad_value
418
+ # # Create a new image with the desired height
419
+ # padded_image = np.zeros((height + pad_height, width, channels), dtype=image.dtype)
420
+
421
+ # # Copy the original image to the top of the padded image
422
+ # padded_image[:height, :, :] = image
423
+
424
+ # # Fill the padding area with the specified value
425
+ # if isinstance(pad_value, tuple): # Check for multiple color values (e.g., BGR)
426
+ # padded_image[height:, :, :] = pad_value
427
+ # else: # Single value for all channels (e.g., black)
428
+ # padded_image[height:, :, :] = np.full((pad_height, width, 1), pad_value, dtype=image.dtype)
429
+
430
+ return np.vstack((image, padding))
431
+
432
+ def crop_to_drawing(image):
433
+ """
434
+ Crops an image to the tight bounding rectangle of non-zero pixels.
435
+
436
+ Args:
437
+ image: A NumPy array representing the image.
438
+
439
+ Returns:
440
+ A cropped image (NumPy array) containing only the drawing area.
441
+ """
442
+ image=np.transpose(image, (2, 0, 1))
443
+ united_x,united_h=0,0
444
+ for channel in np.arange(image.shape[0]):
445
+ x, y, w, h = cv2.boundingRect(image[channel])
446
+ if x>united_x:
447
+ united_x=x
448
+
449
+ if h>united_h:
450
+ united_h=h
451
+
452
+ for channel in np.arange(image.shape[0]):
453
+ # Crop the image
454
+ image[channel] = image[channel][y:y+united_h, x:x+united_x]
455
+ return image.transpose(image, (1,2,0))
456
+
457
+ # get max index of 2d array
458
+ def npmax(array):
459
+ arrayindex = array.argmax(1)
460
+ arrayvalue = array.max(1)
461
+ i = arrayvalue.argmax()
462
+ j = arrayindex[i]
463
+ return i, j