File size: 8,515 Bytes
654303c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 |
import os
import time
import numpy as np
import cv2
import mediapipe as mp
from prediction import predict_from_image
from PredictWord import PredictWord, clear_notepad_file
Header_path = "Assets/header"
myList = os.listdir(Header_path)
cam = cv2.VideoCapture(0)
wCam, hCam = 1280, 720
class HandDetector:
def __init__(self, mode=False, maxHands=2, modelComplexity=1, detectionCon=0.8, trackCon=0.8):
self.mode = mode
self.maxHands = maxHands
self.modelComplexity = modelComplexity
self.detectionCon = detectionCon
self.trackCon = trackCon
self.mpHands = mp.solutions.hands
self.hands = self.mpHands.Hands(self.mode, self.maxHands, self.modelComplexity, self.detectionCon,
self.trackCon)
self.mpDraw = mp.solutions.drawing_utils
self.tipIds = [4, 8, 12, 16, 20]
self.lmList = []
def findHands(self, img):
imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
self.results = self.hands.process(imgRGB)
if self.results.multi_hand_landmarks:
for handLms in self.results.multi_hand_landmarks:
self.mpDraw.draw_landmarks(img, handLms, self.mpHands.HAND_CONNECTIONS)
return img
def findPosition(self, img, handNo=0):
self.lmList = []
if self.results.multi_hand_landmarks:
myHand = self.results.multi_hand_landmarks[handNo]
for id, lm in enumerate(myHand.landmark):
h, w, c = img.shape
cx, cy = int(lm.x * w), int(lm.y * h)
self.lmList.append([id, cx, cy])
return self.lmList
def fingerup(self):
fingers = []
# Thumb
if self.lmList[self.tipIds[0]][1] < self.lmList[self.tipIds[0] - 1][1]:
fingers.append(1)
else:
fingers.append(0)
for id in range(1, 5):
if self.lmList[self.tipIds[id]][2] < self.lmList[self.tipIds[id] - 2][2]:
fingers.append(1)
else:
fingers.append(0)
return fingers
def main():
detector = HandDetector()
cTime = 0
overlayList = []
drawColor = (0, 0, 255)
for impath in myList:
image = cv2.imread(f'{Header_path}/{impath}')
if image is not None:
overlayList.append(image)
header = None
if overlayList:
header = cv2.resize(overlayList[0], (1280, 125)) if overlayList[0].shape != (125, 1280, 3) else overlayList[0]
RightBar = cv2.imread('Assets/sidebar/right.png')
RightBar = cv2.resize(RightBar, (230, 595))
LeftBar = cv2.imread('Assets/sidebar/left.png')
LeftBar = cv2.resize(LeftBar, (226, 300))
mode = "Drawing Mode"
canvas = np.zeros((720, 1280, 3), np.uint8)
submode = "Letter_Prediction"
predicted_letter = ""
clear_notepad_file(output_dir='output', filename='output.txt')
xp, yp = 0, 0
while True:
success, img = cam.read()
img = cv2.resize(img, (wCam, hCam))
img = cv2.flip(img, 1)
img = detector.findHands(img)
lmlist = detector.findPosition(img)
# Only process drawing if hand landmarks are detected
if len(lmlist) != 0:
x1, y1 = lmlist[8][1:3]
x2, y2 = lmlist[12][1:3]
fingers = []
if lmlist:
fingers = detector.fingerup()
# Selection Mode: both index and middle finger up
if fingers[1] == 1 and fingers[2] == 1:
xp, yp = 0, 0
if y1 < 125 and len(overlayList) >= 2:
if 0 < x1 < 271:
drawColor = (0, 0, 255)
header = cv2.resize(overlayList[0], (1280, 125))
elif 850 < x1 < 1280 and len(overlayList) > 1:
drawColor = (0, 0, 0)
header = cv2.resize(overlayList[1], (1280, 125))
cv2.rectangle(img, (x1, y1 - 25), (x2, y2 + 25), drawColor, cv2.FILLED)
# Rightbar actions
if x1 > 1050:
if 125 < y1 < 250:
canvas = np.zeros((720, 1280, 3), np.uint8) # Clear canvas
if 260 < y1 < 385:
pass
if 385 < y1 < 510:
mode = "Drawing Mode"
if 510 < y1 < 635:
mode = "Prediction Mode"
# Drawing Mode: only index finger up
if len(fingers) >= 3 and fingers[1] and not fingers[2] and mode == "Drawing Mode":
if xp == 0 and yp == 0:
xp, yp = x1, y1
xp, yp = x1, y1
if drawColor == (0, 0, 0):
cv2.circle(img, (x1, y1), 30, drawColor, cv2.FILLED)
cv2.line(canvas, (xp, yp), (x1, y1), drawColor, 75)
else:
cv2.line(canvas, (xp, yp), (x1, y1), drawColor, 15)
xp, yp = x1, y1
if mode == "Prediction Mode":
if LeftBar is not None:
img[125:425, 0:226] = LeftBar
if len(fingers) >= 3 and fingers[1] and not fingers[2]:
if xp == 0 and yp == 0:
xp, yp = x1, y1
xp, yp = x1, y1
if drawColor == (0, 0, 0):
cv2.circle(img, (x1, y1), 30, drawColor, cv2.FILLED)
cv2.line(canvas, (xp, yp), (x1, y1), drawColor, 75)
else:
cv2.line(canvas, (xp, yp), (x1, y1), drawColor, 15)
xp, yp = x1, y1
# Leftbar actions
if x1 < 300:
if 150 < y1 < 300:
submode = "Letter Prediction"
cv2.imwrite("Output/Letter.png", canvas)
predicted_letter, confidence = predict_from_image("Output/Letter.png")
cv2.putText(img, f'Predicted Letter: {predicted_letter}', (50, 500), cv2.FONT_HERSHEY_TRIPLEX,
1, (255, 0, 255), 2)
prediction_time = time.time()
reset_canvas = True
if 315 < y1 < 405:
submode = "Word Prediction"
cv2.imwrite("Output/Word.png", canvas)
predictor = PredictWord("Output/Word.png")
result = predictor.predict()
print("Detected word:", result)
PredictWord.save_and_speak_word(result, output_dir='output', filename='output.txt')
canvas = np.zeros((720, 1280, 3), np.uint8)
#
# # Place this outside the x1 < 300 block, so it runs every frame
# if reset_canvas and prediction_time is not None:
# if time.time() - prediction_time > 5:
# canvas = np.zeros((720, 1280, 3), np.uint8)
# reset_canvas = False
# prediction_time = None
# Combine canvas and camera image using bitwise operations
imgGray = cv2.cvtColor(canvas, cv2.COLOR_BGR2GRAY)
_, imgInv = cv2.threshold(imgGray, 50, 255, cv2.THRESH_BINARY_INV)
imgInv = cv2.cvtColor(imgInv, cv2.COLOR_GRAY2BGR)
img = cv2.bitwise_and(img, imgInv)
img = cv2.bitwise_or(img, canvas)
# Calculate FPS (frames per second)
pTime = time.time()
fps = 1 / (pTime - cTime) if cTime != 0 else 0
cTime = pTime
# Overlay header and RightBar only if they are loaded (robustness)
if header is not None:
img[0:125, 0:1280] = header
if RightBar is not None:
img[125:720, 1050:1280] = RightBar
cv2.putText(img, f"Mode : {mode}", (1065, 645), cv2.FONT_HERSHEY_TRIPLEX, 0.5, (255, 0, 255), 1)
cv2.putText(img, f'FPS: {int(fps)}', (1095, 695), cv2.FONT_HERSHEY_TRIPLEX, 1, (255, 0, 255), 1)
cv2.imshow("Canvas", canvas)
cv2.imshow("Image", img)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
if __name__ == "__main__":
main()
cam.release()
cv2.destroyAllWindows() |