Spaces:
Runtime error
Runtime error
File size: 10,804 Bytes
1fa3da0 74a2dda 1fa3da0 db6e2f8 1fa3da0 db6e2f8 1fa3da0 db6e2f8 1fa3da0 db6e2f8 1fa3da0 db6e2f8 1fa3da0 74a2dda 1fa3da0 db6e2f8 1fa3da0 74a2dda 1fa3da0 74a2dda db6e2f8 1fa3da0 db6e2f8 74a2dda 1fa3da0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 |
import cv2
import base64
import numpy as np
import io
from flask import Flask, render_template, Response, request, jsonify
from flask_socketio import SocketIO, emit
from PIL import Image
from time import time as unix_time
import os
import mediapipe as mp
from mediapipe.tasks import python
from mediapipe.tasks.python import vision
import time
import argparse
from mediapipe.framework.formats import landmark_pb2
from mediapipe import solutions
from tflite_support.task import vision as vision2
from tflite_support.task import core
from tflite_support.task import processor
from numpy.linalg import norm
#Image Annotation Utils
char_list=[]
global letter_result
letter_result = 0
global old_letter_result
old_letter_result = 0
MARGIN = 10 # pixels
FONT_SIZE = 1
FONT_THICKNESS = 1
HANDEDNESS_TEXT_COLOR = (88, 205, 54) # vibrant green
global test_x
global test_y
global result_to_show
result_to_show=0
global cresult_to_show
cresult_to_show=0
text_x = 0
text_y = 0
cwhich=0
lastwidth = 400
letterscore=0
frame_time=0
same_letter_time=0
no_hand_flag=1
# UTILS
def brightness(img):
if len(img.shape) == 3:
# Colored RGB or BGR (*Do Not* use HSV images with this function)
# create brightness with euclidean norm
return np.average(norm(img, axis=2)) / np.sqrt(3)
else:
# Grayscale
return np.average(img)
def draw_landmarks_on_image(rgb_image, detection_result):
hand_landmarks_list = detection_result.hand_landmarks
handedness_list = detection_result.handedness
annotated_image = np.copy(rgb_image)
crop = []
image_height, image_width, image_heightgray=annotated_image.shape
# Loop through the detected hands to visualize.
for idx in range(len(hand_landmarks_list)):
hand_landmarks = hand_landmarks_list[idx]
handedness = handedness_list[idx]
# Draw the hand landmarks.
hand_landmarks_proto = landmark_pb2.NormalizedLandmarkList()
hand_landmarks_proto.landmark.extend([
landmark_pb2.NormalizedLandmark(x=landmark.x, y=landmark.y, z=landmark.z) for landmark in hand_landmarks
])
solutions.drawing_utils.draw_landmarks(
annotated_image,
hand_landmarks_proto,
solutions.hands.HAND_CONNECTIONS,
solutions.drawing_styles.get_default_hand_landmarks_style(),
solutions.drawing_styles.get_default_hand_connections_style())
# Get bounding box
height, width, _ = annotated_image.shape
x_coordinates = [landmark.x for landmark in hand_landmarks]
y_coordinates = [landmark.y for landmark in hand_landmarks]
min_x = int(min(x_coordinates) * width) # Left
min_y = int(min(y_coordinates) * height) # Top
max_x = int(max(x_coordinates) * width) # Right
max_y = int(max(y_coordinates) * height) # Bottom
#Get dimensions of bounding box
sect_height = max_y-(min_y)
sect_width = max_x-(min_x)
#Get center of bounding box
center_x=(min_x+max_x)/2
center_y=(min_y+max_y)/2
sect_diameter=50
#Define dominant axis for aspect ratio
if(sect_height>sect_width):
sect_diameter = sect_height
if(sect_height<sect_width):
sect_diameter = sect_width
sect_diameter=sect_diameter+50 # Pad diameter
sect_radius=int(sect_diameter/2) # Find radius
#Crop Image
crop_top=int(center_y-sect_radius) #Top boundry
crop_bottom=int(center_y+sect_radius) #Bottom boundry
crop_left=int(center_x-sect_radius) #Left boundry
crop_right=int(center_x+sect_radius) #Right boundry
#Account for out of canvas
if(crop_top<0): #Bounding box too high
crop_top=0
if(crop_left<0): #Bounding box too far left
crop_left=0
if(crop_right>image_width): #Bounding box too far right
crop_right=image_width
if(crop_bottom>image_height): #Bounding box too low
crop_bottom=image_height
# Trace bounding box
annotated_image = cv2.rectangle(annotated_image, (crop_left, crop_top), (crop_right, crop_bottom), (255,0,0), 6)
global text_x
global text_y
# For text, currently not used
text_x=crop_left
text_y=crop_top
# Get cropped image
crop = annotated_image[crop_top:crop_bottom, crop_left:crop_right]
# Scale cropped image
h, w = crop.shape[0:2]
neww = 150
newh = int(neww*(h/w))
crop = cv2.resize(crop, (neww, newh))
#annotated_image[0:0+crop.shape[0], 0:0+crop.shape[1]] = crop # Used for superimposition
#annotated_image=crop # Used for replacement
return [annotated_image, crop]
#-------------------------------------------------------------
# Letter List
letter_list=["A","B","C","D","E","F","G","H","I","J","K","L","M","N","O","P","Q","R","S","T","U","V","W","X","Y","Z","#"]
# Initialise MediaPipe hand landmark detction
RESULT = None
BaseOptions = mp.tasks.BaseOptions
HandLandmarker = mp.tasks.vision.HandLandmarker
HandLandmarkerOptions = mp.tasks.vision.HandLandmarkerOptions
HandLandmarkerResult = mp.tasks.vision.HandLandmarkerResult
VisionRunningMode = mp.tasks.vision.RunningMode
cbase_options = core.BaseOptions(file_name="./better_exported/model.tflite") # New tflite
ccbase_options = core.BaseOptions(file_name="./exported/model.tflite") # Old tflite
# Initialise ASL tflite model
cclassification_options = processor.ClassificationOptions(max_results=1)
coptions = vision2.ImageClassifierOptions(base_options=cbase_options, classification_options=cclassification_options)
ccoptions = vision2.ImageClassifierOptions(base_options=ccbase_options, classification_options=cclassification_options)
cclassifier = vision2.ImageClassifier.create_from_options(coptions)
ccclassifier = vision2.ImageClassifier.create_from_options(ccoptions)
def print_result(result: HandLandmarkerResult, output_image: mp.Image, timestamp_ms: int):
global RESULT
RESULT=result
options = HandLandmarkerOptions(
base_options=BaseOptions(model_asset_path='hand_landmarker.task'),
running_mode=VisionRunningMode.LIVE_STREAM,
result_callback=print_result)
detector = vision.HandLandmarker.create_from_options(options)
video_frames=[]
app = Flask(__name__)
@app.route('/')
def index():
return render_template('index.html')
@app.route('/api/data', methods=['POST'])
def handle_video_frame():
frame = request.json.get('key')
#print(request.json)
response_frame = data_uri_to_image(frame)
decimg = response_frame
#--------------------------------------------
mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=decimg) # Create MediaPipe image
#print(mp.Timestamp.from_seconds(time.time()).value)
detection_result = detector.detect_async(mp_image, mp.Timestamp.from_seconds(time.time()).value) # detct
# Try-Catch block, because detection is not done during model initialisation
global no_hand_flag, frame_time, same_letter_time, letter_result, old_letter_result, char_list, letterscore
try:
result_images = draw_landmarks_on_image(mp_image.numpy_view(), RESULT) # Array of annotated and cropped images
annotated_image = result_images[0]
cropped_image = result_images[1]
#Standardise and fit shape by resizing
h, w = annotated_image.shape[0:2]
neww = 500
newh = int(neww*(h/w))
resized_image = cv2.resize(annotated_image, (neww, newh))
final_image=resized_image
if(RESULT.handedness != []): # To chack if there is any result at all and then feed tflite model
no_hand_flag=0
if RESULT.handedness[0][0].display_name == 'Right':
tf_image = vision2.TensorImage.create_from_array(cropped_image)
classification_result = cclassifier.classify(tf_image) # New
cclassification_result = ccclassifier.classify(tf_image) # Old
result_to_show = classification_result.classifications[0].categories[0].category_name # New
cresult_to_show = cclassification_result.classifications[0].categories[0].category_name # Old
if cclassification_result.classifications[0].categories[0].score > classification_result.classifications[0].categories[0].score:
letter_result = cresult_to_show # To implement further UX with Text to Speech
cwhich="Old"
if result_to_show == "P" and cresult_to_show !="P":
cwhich="New"
letter_result = result_to_show
else:
letter_result = result_to_show # To implement further UX with Text to Speech
cwhich="New"
if cresult_to_show == "M" and cresult_to_show !="M":
cwhich="Old"
if result_to_show != "R" and cresult_to_show =="R":
cwhich="Old"
letter_result = cresult_to_show
if result_to_show != "T" and cresult_to_show =="T":
cwhich="Old"
letter_result = cresult_to_show
if cwhich=="Old" :
letterscore = cclassification_result.classifications[0].categories[0].score
if cwhich=="New" :
letterscore = classification_result.classifications[0].categories[0].score
else:
tf_image = vision2.TensorImage.create_from_array(cropped_image)
classification_result = cclassifier.classify(tf_image) # New
result_to_show = classification_result.classifications[0].categories[0].category_name # New
if result_to_show != "B":
letter_result='_'
else:
letter_result='>'
except Exception as e:
# Ha! The catch err{throw err} scenario, it was actually quite useful in debugging though
print(e)
frame_data = image_to_data_uri(final_image)
#print(frame_data)
return jsonify({"result": letter_result, "frame": frame_data}), 200
def data_uri_to_image(data_uri):
header, encoded = data_uri.split(',', 1)
decoded_data = base64.b64decode(encoded)
nparr = np.frombuffer(decoded_data, np.uint8)
image = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
return image
def image_to_data_uri(image):
# Encode the image as a JPEG
_, buffer = cv2.imencode('.jpg', image)
# Convert the buffer to bytes
image_bytes = buffer.tobytes()
# Encode the bytes to Base64
base64_encoded = base64.b64encode(image_bytes).decode('utf-8')
# Create the Data URI
data_uri = f"data:image/jpeg;base64,{base64_encoded}"
return data_uri
if (__name__ == '__main__'):
app.run( host='0.0.0.0', port=7860) |