Spaces:
Sleeping
Sleeping
| import cv2 | |
| import numpy as np | |
| import pickle | |
| import tensorflow as tf | |
| import mediapipe as mp | |
| from fastapi import WebSocket | |
| lettersModel = tf.keras.models.load_model('ai_model/models/detectLettersModel.keras') | |
| with open('ai_model/models/labelEncoder.pickle', 'rb') as f: | |
| labelEncoder = pickle.load(f) | |
| lettersModel2 = tf.keras.models.load_model('ai_model/jz_model/JZModel.keras') | |
| with open('ai_model/jz_model/labelEncoder.pickle', 'rb') as f: | |
| labelEncoder2 = pickle.load(f) | |
| numbersModel = tf.keras.models.load_model('ai_model/models/detectNumbersModel.keras') | |
| with open('ai_model/models/numLabelEncoder.pickle', 'rb') as f: | |
| numLabelEncoder = pickle.load(f) | |
| hands = mp.solutions.hands.Hands(static_image_mode=True) | |
| async def detectFromImageBytes(sequenceBytesList, websocket: WebSocket = None, isDynamic=False): | |
| numFrames = len(sequenceBytesList) | |
| if numFrames == 0: | |
| return {'letter': '', 'confidenceLetter': 0.0, 'number': '', 'confidenceNumber': 0.0} | |
| def processSingleFrame(imageBytes): | |
| nparr = np.frombuffer(imageBytes, np.uint8) | |
| image = cv2.imdecode(nparr, cv2.IMREAD_COLOR) | |
| if image is None: | |
| return None, None, None, None | |
| imgRGB = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) | |
| results = hands.process(imgRGB) | |
| if not results.multi_hand_landmarks: | |
| return None, None, None, None | |
| handLandmarks = results.multi_hand_landmarks[0] | |
| xList, yList = [], [] | |
| dataAux = [] | |
| for lm in handLandmarks.landmark: | |
| xList.append(lm.x) | |
| yList.append(lm.y) | |
| for lm in handLandmarks.landmark: | |
| dataAux.append(lm.x - min(xList)) | |
| dataAux.append(lm.y - min(yList)) | |
| inputData = np.array(dataAux, dtype=np.float32).reshape(1, 42, 1) | |
| prediction1 = lettersModel.predict(inputData, verbose=0) | |
| index1 = np.argmax(prediction1, axis=1)[0] | |
| confidence1 = float(np.max(prediction1)) | |
| label1 = labelEncoder.inverse_transform([index1])[0] if confidence1 >= 0.6 else '' | |
| prediction3 = numbersModel.predict(inputData, verbose=0) | |
| index3 = np.argmax(prediction3, axis=1)[0] | |
| confidence3 = float(np.max(prediction3)) | |
| label3 = numLabelEncoder.inverse_transform([index3])[0] if confidence3 >= 0.6 else '' | |
| print(f'Letters Model 1: {label1 or "None"} at {confidence1}') | |
| print(f'Numbers Model: {label3 or "None"} at {confidence3}') | |
| return label1, confidence1, label3, confidence3 | |
| def processSequence(frames): | |
| processedSequence = [] | |
| for imageBytes in frames: | |
| nparr = np.frombuffer(imageBytes, np.uint8) | |
| image = cv2.imdecode(nparr, cv2.IMREAD_COLOR) | |
| if image is None: | |
| processedSequence.append(None) | |
| continue | |
| imgRGB = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) | |
| results = hands.process(imgRGB) | |
| if not results.multi_hand_landmarks: | |
| processedSequence.append(None) | |
| continue | |
| handLandmarks = results.multi_hand_landmarks[0] | |
| xList, yList = [], [] | |
| dataAux2 = [] | |
| for lm in handLandmarks.landmark: | |
| xList.append(lm.x) | |
| yList.append(lm.y) | |
| for lm in handLandmarks.landmark: | |
| dataAux2.append(lm.x - min(xList)) | |
| dataAux2.append(lm.y - min(yList)) | |
| dataAux2.append(0) | |
| processedSequence.append(dataAux2) | |
| for i in range(len(processedSequence)): | |
| if processedSequence[i] is None: | |
| prevIdx, nextIdx = -1, -1 | |
| for j in range(i - 1, -1, -1): | |
| if processedSequence[j] is not None: | |
| prevIdx = j | |
| break | |
| for j in range(i + 1, len(processedSequence)): | |
| if processedSequence[j] is not None: | |
| nextIdx = j | |
| break | |
| if prevIdx != -1 and nextIdx != -1: | |
| prevData = np.array(processedSequence[prevIdx]) | |
| nextData = np.array(processedSequence[nextIdx]) | |
| t = (i - prevIdx) / (nextIdx - prevIdx) | |
| interpolatedData = prevData + (nextData - prevData) * t | |
| processedSequence[i] = interpolatedData.tolist() | |
| elif prevIdx != -1: | |
| processedSequence[i] = processedSequence[prevIdx] | |
| elif nextIdx != -1: | |
| processedSequence[i] = processedSequence[nextIdx] | |
| if None in processedSequence: | |
| print("Incomplete sequence after interpolation") | |
| return None, None | |
| inputData2 = np.array(processedSequence, dtype=np.float32).reshape(1, len(frames), 63) | |
| prediction2 = lettersModel2.predict(inputData2, verbose=0) | |
| index2 = np.argmax(prediction2, axis=1)[0] | |
| confidence2 = float(np.max(prediction2)) | |
| label2 = labelEncoder2.inverse_transform([index2])[0] if confidence2 >= 0.6 else '' | |
| print(f'Letters Model 2: {label2 or "None"} at {confidence2}') | |
| return label2, confidence2 | |
| if numFrames == 1: | |
| label1, confidence1, label3, confidence3 = processSingleFrame(sequenceBytesList[0]) | |
| if label1 is None: | |
| return {'letter': '', 'confidenceLetter': 0.0, 'number': '', 'confidenceNumber': 0.0} | |
| if label1 in ['J', 'Z']: | |
| return {'status': 'waitMoreDynamic'} | |
| return {'status': 'waitMore'} | |
| elif numFrames == 2: | |
| label1First, _, _, _ = processSingleFrame(sequenceBytesList[0]) | |
| label1Second, confidence1, label3, confidence3 = processSingleFrame(sequenceBytesList[1]) | |
| if label1First is None or label1Second is None: | |
| return {'letter': '', 'confidenceLetter': 0.0, 'number': '', 'confidenceNumber': 0.0} | |
| if label1First == label1Second and label1First not in ['J', 'Z'] and confidence1 >= 0.6: | |
| return {'letter': label1Second, 'confidenceLetter': confidence1, | |
| 'number': label3, 'confidenceNumber': confidence3} | |
| elif label1First in ['J', 'Z'] or label1Second in ['J', 'Z']: | |
| return {'status': 'waitMoreDynamic'} | |
| else: | |
| return {'letter': '', 'confidenceLetter': 0.0, 'number': '', 'confidenceNumber': 0.0} | |
| elif numFrames >= 10 and isDynamic: | |
| label1, confidence1, _, _ = processSingleFrame(sequenceBytesList[0]) | |
| label2, confidence2 = processSequence(sequenceBytesList[:10]) | |
| if label2 is None: | |
| return {'letter': '', 'confidenceLetter': 0.0, 'number': '', 'confidenceNumber': 0.0} | |
| _, _, label3, confidence3 = processSingleFrame(sequenceBytesList[-1]) | |
| if confidence2 >= 0.6: | |
| if label1 == 'I': | |
| if label2 == 'J': | |
| return {'letter': label2, 'confidenceLetter': confidence2, | |
| 'number': label3, 'confidenceNumber': confidence3} | |
| else: | |
| return {'letter': label1, 'confidenceLetter': confidence1, | |
| 'number': label3, 'confidenceNumber': confidence3} | |
| return {'letter': label1, 'confidenceLetter': confidence1, | |
| 'number': label3, 'confidenceNumber': confidence3} | |
| return {'letter': '', 'confidenceLetter': 0.0, 'number': label3, 'confidenceNumber': confidence3} |