Spaces:
Sleeping
Sleeping
File size: 7,476 Bytes
ae78832 10fc241 ae78832 10fc241 ae78832 10fc241 ae78832 812faa8 ae78832 812faa8 ae78832 812faa8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 |
import cv2
import numpy as np
import pickle
import tensorflow as tf
import mediapipe as mp
from fastapi import WebSocket
lettersModel = tf.keras.models.load_model('ai_model/models/detectLettersModel.keras')
with open('ai_model/models/labelEncoder.pickle', 'rb') as f:
labelEncoder = pickle.load(f)
lettersModel2 = tf.keras.models.load_model('ai_model/jz_model/JZModel.keras')
with open('ai_model/jz_model/labelEncoder.pickle', 'rb') as f:
labelEncoder2 = pickle.load(f)
numbersModel = tf.keras.models.load_model('ai_model/models/detectNumbersModel.keras')
with open('ai_model/models/numLabelEncoder.pickle', 'rb') as f:
numLabelEncoder = pickle.load(f)
hands = mp.solutions.hands.Hands(static_image_mode=True)
async def detectFromImageBytes(sequenceBytesList, websocket: WebSocket = None, isDynamic=False):
numFrames = len(sequenceBytesList)
if numFrames == 0:
return {'letter': '', 'confidenceLetter': 0.0, 'number': '', 'confidenceNumber': 0.0}
def processSingleFrame(imageBytes):
nparr = np.frombuffer(imageBytes, np.uint8)
image = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
if image is None:
return None, None, None, None
imgRGB = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
results = hands.process(imgRGB)
if not results.multi_hand_landmarks:
return None, None, None, None
handLandmarks = results.multi_hand_landmarks[0]
xList, yList = [], []
dataAux = []
for lm in handLandmarks.landmark:
xList.append(lm.x)
yList.append(lm.y)
for lm in handLandmarks.landmark:
dataAux.append(lm.x - min(xList))
dataAux.append(lm.y - min(yList))
inputData = np.array(dataAux, dtype=np.float32).reshape(1, 42, 1)
prediction1 = lettersModel.predict(inputData, verbose=0)
index1 = np.argmax(prediction1, axis=1)[0]
confidence1 = float(np.max(prediction1))
label1 = labelEncoder.inverse_transform([index1])[0] if confidence1 >= 0.6 else ''
prediction3 = numbersModel.predict(inputData, verbose=0)
index3 = np.argmax(prediction3, axis=1)[0]
confidence3 = float(np.max(prediction3))
label3 = numLabelEncoder.inverse_transform([index3])[0] if confidence3 >= 0.6 else ''
print(f'Letters Model 1: {label1 or "None"} at {confidence1}')
print(f'Numbers Model: {label3 or "None"} at {confidence3}')
return label1, confidence1, label3, confidence3
def processSequence(frames):
processedSequence = []
for imageBytes in frames:
nparr = np.frombuffer(imageBytes, np.uint8)
image = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
if image is None:
processedSequence.append(None)
continue
imgRGB = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
results = hands.process(imgRGB)
if not results.multi_hand_landmarks:
processedSequence.append(None)
continue
handLandmarks = results.multi_hand_landmarks[0]
xList, yList = [], []
dataAux2 = []
for lm in handLandmarks.landmark:
xList.append(lm.x)
yList.append(lm.y)
for lm in handLandmarks.landmark:
dataAux2.append(lm.x - min(xList))
dataAux2.append(lm.y - min(yList))
dataAux2.append(0)
processedSequence.append(dataAux2)
for i in range(len(processedSequence)):
if processedSequence[i] is None:
prevIdx, nextIdx = -1, -1
for j in range(i - 1, -1, -1):
if processedSequence[j] is not None:
prevIdx = j
break
for j in range(i + 1, len(processedSequence)):
if processedSequence[j] is not None:
nextIdx = j
break
if prevIdx != -1 and nextIdx != -1:
prevData = np.array(processedSequence[prevIdx])
nextData = np.array(processedSequence[nextIdx])
t = (i - prevIdx) / (nextIdx - prevIdx)
interpolatedData = prevData + (nextData - prevData) * t
processedSequence[i] = interpolatedData.tolist()
elif prevIdx != -1:
processedSequence[i] = processedSequence[prevIdx]
elif nextIdx != -1:
processedSequence[i] = processedSequence[nextIdx]
if None in processedSequence:
print("Incomplete sequence after interpolation")
return None, None
inputData2 = np.array(processedSequence, dtype=np.float32).reshape(1, len(frames), 63)
prediction2 = lettersModel2.predict(inputData2, verbose=0)
index2 = np.argmax(prediction2, axis=1)[0]
confidence2 = float(np.max(prediction2))
label2 = labelEncoder2.inverse_transform([index2])[0] if confidence2 >= 0.6 else ''
print(f'Letters Model 2: {label2 or "None"} at {confidence2}')
return label2, confidence2
if numFrames == 1:
label1, confidence1, label3, confidence3 = processSingleFrame(sequenceBytesList[0])
if label1 is None:
return {'letter': '', 'confidenceLetter': 0.0, 'number': '', 'confidenceNumber': 0.0}
if label1 in ['J', 'Z']:
return {'status': 'waitMoreDynamic'}
return {'status': 'waitMore'}
elif numFrames == 2:
label1First, _, _, _ = processSingleFrame(sequenceBytesList[0])
label1Second, confidence1, label3, confidence3 = processSingleFrame(sequenceBytesList[1])
if label1First is None or label1Second is None:
return {'letter': '', 'confidenceLetter': 0.0, 'number': '', 'confidenceNumber': 0.0}
if label1First == label1Second and label1First not in ['J', 'Z'] and confidence1 >= 0.6:
return {'letter': label1Second, 'confidenceLetter': confidence1,
'number': label3, 'confidenceNumber': confidence3}
elif label1First in ['J', 'Z'] or label1Second in ['J', 'Z']:
return {'status': 'waitMoreDynamic'}
else:
return {'letter': '', 'confidenceLetter': 0.0, 'number': '', 'confidenceNumber': 0.0}
elif numFrames >= 10 and isDynamic:
label1, confidence1, _, _ = processSingleFrame(sequenceBytesList[0])
label2, confidence2 = processSequence(sequenceBytesList[:10])
if label2 is None:
return {'letter': '', 'confidenceLetter': 0.0, 'number': '', 'confidenceNumber': 0.0}
_, _, label3, confidence3 = processSingleFrame(sequenceBytesList[-1])
if confidence2 >= 0.6:
if label1 == 'I':
if label2 == 'J':
return {'letter': label2, 'confidenceLetter': confidence2,
'number': label3, 'confidenceNumber': confidence3}
else:
return {'letter': label1, 'confidenceLetter': confidence1,
'number': label3, 'confidenceNumber': confidence3}
return {'letter': label1, 'confidenceLetter': confidence1,
'number': label3, 'confidenceNumber': confidence3}
return {'letter': '', 'confidenceLetter': 0.0, 'number': label3, 'confidenceNumber': confidence3} |