File size: 10,804 Bytes
1fa3da0
 
 
 
74a2dda
1fa3da0
 
 
db6e2f8
1fa3da0
 
 
 
 
 
 
 
 
 
 
db6e2f8
1fa3da0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
db6e2f8
 
1fa3da0
 
 
 
 
 
 
 
db6e2f8
1fa3da0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
db6e2f8
 
1fa3da0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74a2dda
1fa3da0
 
db6e2f8
1fa3da0
 
 
74a2dda
 
 
 
1fa3da0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74a2dda
 
 
 
db6e2f8
1fa3da0
 
 
 
 
 
db6e2f8
74a2dda
 
 
 
 
 
 
 
 
 
1fa3da0
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
import cv2
import base64
import numpy as np
import io
from flask import Flask, render_template, Response, request, jsonify
from flask_socketio import SocketIO, emit
from PIL import Image
from time import time as unix_time
import os
import mediapipe as mp
from mediapipe.tasks import python
from mediapipe.tasks.python import vision
import time
import argparse
from mediapipe.framework.formats import landmark_pb2
from mediapipe import solutions
from tflite_support.task import vision as vision2
from tflite_support.task import core
from tflite_support.task import processor
from numpy.linalg import norm

#Image Annotation Utils
char_list=[]
global letter_result
letter_result = 0
global old_letter_result
old_letter_result = 0
MARGIN = 10  # pixels
FONT_SIZE = 1
FONT_THICKNESS = 1
HANDEDNESS_TEXT_COLOR = (88, 205, 54) # vibrant green
global test_x
global test_y
global result_to_show
result_to_show=0
global cresult_to_show
cresult_to_show=0
text_x = 0
text_y = 0
cwhich=0
lastwidth = 400
letterscore=0
frame_time=0
same_letter_time=0
no_hand_flag=1
# UTILS


def brightness(img):
    if len(img.shape) == 3:
        # Colored RGB or BGR (*Do Not* use HSV images with this function)
        # create brightness with euclidean norm
        return np.average(norm(img, axis=2)) / np.sqrt(3)
    else:
        # Grayscale
        return np.average(img)


def draw_landmarks_on_image(rgb_image, detection_result):
  hand_landmarks_list = detection_result.hand_landmarks
  handedness_list = detection_result.handedness
  annotated_image = np.copy(rgb_image)
  crop = []
  image_height, image_width, image_heightgray=annotated_image.shape


  # Loop through the detected hands to visualize.
  for idx in range(len(hand_landmarks_list)):
    hand_landmarks = hand_landmarks_list[idx]
    handedness = handedness_list[idx]

    # Draw the hand landmarks.
    hand_landmarks_proto = landmark_pb2.NormalizedLandmarkList()
    hand_landmarks_proto.landmark.extend([
      landmark_pb2.NormalizedLandmark(x=landmark.x, y=landmark.y, z=landmark.z) for landmark in hand_landmarks
    ])
    solutions.drawing_utils.draw_landmarks(
      annotated_image,
      hand_landmarks_proto,
      solutions.hands.HAND_CONNECTIONS,
      solutions.drawing_styles.get_default_hand_landmarks_style(),
      solutions.drawing_styles.get_default_hand_connections_style())

    # Get bounding box

    height, width, _ = annotated_image.shape
    
    x_coordinates = [landmark.x for landmark in hand_landmarks]
    y_coordinates = [landmark.y for landmark in hand_landmarks]
    
    min_x = int(min(x_coordinates) * width)  # Left
    min_y = int(min(y_coordinates) * height)    # Top
    max_x = int(max(x_coordinates) * width)  # Right
    max_y = int(max(y_coordinates) * height) # Bottom

    #Get dimensions of bounding box
    sect_height = max_y-(min_y)
    sect_width = max_x-(min_x)

    #Get center of bounding box
    center_x=(min_x+max_x)/2
    center_y=(min_y+max_y)/2

    sect_diameter=50
    #Define dominant axis for aspect ratio

    if(sect_height>sect_width):
        sect_diameter = sect_height

    if(sect_height<sect_width):
        sect_diameter = sect_width

    sect_diameter=sect_diameter+50 # Pad diameter
    sect_radius=int(sect_diameter/2) # Find radius
    
    #Crop Image
    crop_top=int(center_y-sect_radius) #Top boundry
    crop_bottom=int(center_y+sect_radius) #Bottom boundry
    crop_left=int(center_x-sect_radius) #Left boundry
    crop_right=int(center_x+sect_radius) #Right boundry

    #Account for out of canvas
    if(crop_top<0): #Bounding box too high
        crop_top=0

    if(crop_left<0): #Bounding box too far left
        crop_left=0

    if(crop_right>image_width): #Bounding box too far right
        crop_right=image_width

    if(crop_bottom>image_height): #Bounding box too low
        crop_bottom=image_height

    # Trace bounding box
    annotated_image = cv2.rectangle(annotated_image, (crop_left, crop_top), (crop_right, crop_bottom), (255,0,0), 6)

    global text_x
    global text_y

    # For text, currently not used
    text_x=crop_left
    text_y=crop_top
    
    # Get cropped image
    crop = annotated_image[crop_top:crop_bottom, crop_left:crop_right] 

    # Scale cropped image
    h, w = crop.shape[0:2]
    neww = 150
    newh = int(neww*(h/w))
    crop = cv2.resize(crop, (neww, newh))
    
    #annotated_image[0:0+crop.shape[0], 0:0+crop.shape[1]] = crop # Used for superimposition

    #annotated_image=crop # Used for replacement
    
  return [annotated_image, crop]

#-------------------------------------------------------------

# Letter List
letter_list=["A","B","C","D","E","F","G","H","I","J","K","L","M","N","O","P","Q","R","S","T","U","V","W","X","Y","Z","#"]

# Initialise MediaPipe hand landmark detction
RESULT = None
BaseOptions = mp.tasks.BaseOptions
HandLandmarker = mp.tasks.vision.HandLandmarker
HandLandmarkerOptions = mp.tasks.vision.HandLandmarkerOptions
HandLandmarkerResult = mp.tasks.vision.HandLandmarkerResult
VisionRunningMode = mp.tasks.vision.RunningMode

cbase_options = core.BaseOptions(file_name="./better_exported/model.tflite") # New tflite
ccbase_options = core.BaseOptions(file_name="./exported/model.tflite") # Old tflite

# Initialise ASL tflite model

cclassification_options = processor.ClassificationOptions(max_results=1)
coptions = vision2.ImageClassifierOptions(base_options=cbase_options, classification_options=cclassification_options)
ccoptions = vision2.ImageClassifierOptions(base_options=ccbase_options, classification_options=cclassification_options)
cclassifier = vision2.ImageClassifier.create_from_options(coptions)
ccclassifier = vision2.ImageClassifier.create_from_options(ccoptions)



def print_result(result: HandLandmarkerResult, output_image: mp.Image, timestamp_ms: int):

    global RESULT
    RESULT=result


options = HandLandmarkerOptions(
    base_options=BaseOptions(model_asset_path='hand_landmarker.task'),
    running_mode=VisionRunningMode.LIVE_STREAM,
    result_callback=print_result)


detector = vision.HandLandmarker.create_from_options(options)
video_frames=[]



app = Flask(__name__)


@app.route('/')
def index():
    return render_template('index.html')


@app.route('/api/data', methods=['POST'])
def handle_video_frame():
    frame = request.json.get('key')
    #print(request.json)
    response_frame = data_uri_to_image(frame)
    decimg = response_frame

#--------------------------------------------
    mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=decimg) # Create MediaPipe image
    #print(mp.Timestamp.from_seconds(time.time()).value)
    
    detection_result = detector.detect_async(mp_image, mp.Timestamp.from_seconds(time.time()).value) # detct
    
    # Try-Catch block, because detection is not done during model initialisation
    global no_hand_flag, frame_time, same_letter_time, letter_result, old_letter_result, char_list, letterscore

    try:
        result_images = draw_landmarks_on_image(mp_image.numpy_view(), RESULT) # Array of annotated and cropped images
        annotated_image = result_images[0] 
        cropped_image = result_images[1]

        #Standardise and fit shape by resizing

        h, w = annotated_image.shape[0:2]
        neww = 500
        newh = int(neww*(h/w))
        resized_image = cv2.resize(annotated_image, (neww, newh))
        final_image=resized_image

        if(RESULT.handedness != []): # To chack if there is any result at all and then feed tflite model
            no_hand_flag=0

            if RESULT.handedness[0][0].display_name == 'Right':
                tf_image = vision2.TensorImage.create_from_array(cropped_image)
                classification_result = cclassifier.classify(tf_image) # New
                cclassification_result = ccclassifier.classify(tf_image) # Old
                
                result_to_show = classification_result.classifications[0].categories[0].category_name # New
                cresult_to_show = cclassification_result.classifications[0].categories[0].category_name # Old

                if cclassification_result.classifications[0].categories[0].score > classification_result.classifications[0].categories[0].score:
                    letter_result = cresult_to_show # To implement further UX with Text to Speech
                    cwhich="Old"
                    if result_to_show == "P" and cresult_to_show !="P":
                        cwhich="New"
                        letter_result = result_to_show
                    
                    
                else:
                    letter_result = result_to_show # To implement further UX with Text to Speech
                    cwhich="New"
                    if cresult_to_show == "M" and cresult_to_show !="M":
                        cwhich="Old"
                    
                    if result_to_show != "R" and cresult_to_show =="R":
                        cwhich="Old"
                        letter_result = cresult_to_show

                    if result_to_show != "T" and cresult_to_show =="T":
                        cwhich="Old"
                        letter_result = cresult_to_show
                if cwhich=="Old" :
                    letterscore = cclassification_result.classifications[0].categories[0].score

                if cwhich=="New" :
                    letterscore = classification_result.classifications[0].categories[0].score
            else:
                tf_image = vision2.TensorImage.create_from_array(cropped_image)
                classification_result = cclassifier.classify(tf_image) # New
                result_to_show = classification_result.classifications[0].categories[0].category_name # New

                if result_to_show != "B":
                    letter_result='_'
                else:
                    letter_result='>'
    except Exception as e: 
        # Ha! The catch err{throw err} scenario, it was actually quite useful in debugging though
        print(e)
    frame_data = image_to_data_uri(final_image)
    #print(frame_data)
    
    return jsonify({"result": letter_result, "frame": frame_data}), 200

def data_uri_to_image(data_uri):
    header, encoded = data_uri.split(',', 1)
    decoded_data = base64.b64decode(encoded)
    nparr = np.frombuffer(decoded_data, np.uint8)
    image = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
    return image 

def image_to_data_uri(image):
    # Encode the image as a JPEG
    _, buffer = cv2.imencode('.jpg', image)
    # Convert the buffer to bytes
    image_bytes = buffer.tobytes()
    # Encode the bytes to Base64
    base64_encoded = base64.b64encode(image_bytes).decode('utf-8')
    # Create the Data URI
    data_uri = f"data:image/jpeg;base64,{base64_encoded}"
    return data_uri
if (__name__ == '__main__'):
    app.run( host='0.0.0.0', port=7860)