itsgokul02 commited on
Commit
654303c
·
verified ·
1 Parent(s): 3199a6c

Initial commit

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ dataset/train.csv filter=lfs diff=lfs merge=lfs -text
PredictWord.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import cv2
2
+ import pytesseract
3
+ import pyttsx3
4
+ import os
5
+ import subprocess
6
+ import time
7
+
8
+ pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'
9
+
10
+ class PredictWord:
11
+ def __init__(self, image_path):
12
+ self.image_path = image_path
13
+
14
+ def predict(self):
15
+ image = cv2.imread(self.image_path)
16
+ if image is None:
17
+ print(f"Error: Image not found at '{self.image_path}'")
18
+ return None
19
+
20
+ # Convert to gray
21
+ gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
22
+
23
+ custom_config = r'--oem 3 --psm 6'
24
+ word = pytesseract.image_to_string(gray, config=custom_config)
25
+ return word.strip() # 👈 This line is missing in your code
26
+
27
+ @staticmethod
28
+ def save_and_speak_word(word, output_dir='output', filename='output.txt'):
29
+ # Ensure output directory exists
30
+ os.makedirs(output_dir, exist_ok=True)
31
+ file_path = os.path.abspath(os.path.join(output_dir, filename))
32
+
33
+ # Write word to file
34
+ with open(file_path, 'w', encoding='utf-8') as f:
35
+ f.write(word + '\n\n') # Adds space after the w
36
+
37
+ # Open file in Notepad
38
+ subprocess.Popen(['notepad.exe', file_path])
39
+ time.sleep(1) # Give Notepad time to open
40
+
41
+ # Speak the word
42
+ engine = pyttsx3.init()
43
+ engine.say(word)
44
+ engine.runAndWait()
45
+
46
+ def clear_notepad_file(output_dir='output', filename='output.txt'):
47
+ file_path = os.path.abspath(os.path.join(output_dir, filename))
48
+ with open(file_path, 'w', encoding='utf-8') as f:
49
+ f.write('')
ValidationChecker.py ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ from torch.utils.data import DataLoader, Dataset
4
+ from torchvision import transforms
5
+ from PIL import Image
6
+ import pandas as pd
7
+ import io
8
+ import ast
9
+ import os
10
+ from sklearn.metrics import confusion_matrix, classification_report
11
+ import timm
12
+
13
+ class EfficientNetB0Alpha(nn.Module):
14
+ def __init__(self, num_classes=26):
15
+ super().__init__()
16
+ self.model = timm.create_model('efficientnet_b0', pretrained=True, in_chans=1, num_classes=num_classes)
17
+ def forward(self, x):
18
+ return self.model(x)
19
+
20
+ class Dataset(Dataset):
21
+ def __init__(self, csv_path, transform=None, image_col='image', label_col='label'):
22
+ self.data = pd.read_csv(csv_path)
23
+ self.transform = transform
24
+ self.image_col = image_col
25
+ self.label_col = label_col
26
+ def __len__(self):
27
+ return len(self.data)
28
+ def __getitem__(self, idx):
29
+ img_data = self.data.iloc[idx][self.image_col]
30
+ label = self.data.iloc[idx][self.label_col]
31
+ if isinstance(img_data, str):
32
+ img_dict = ast.literal_eval(img_data)
33
+ img_bytes = img_dict['bytes']
34
+ else:
35
+ img_bytes = img_data['bytes']
36
+ img = Image.open(io.BytesIO(img_bytes)).convert('L')
37
+ if self.transform:
38
+ img = self.transform(img)
39
+ return img, label
40
+
41
+ def load_model(model_path, num_classes, device):
42
+ if not os.path.exists(model_path):
43
+ raise FileNotFoundError(f"Model file not found at {model_path}")
44
+ model = EfficientNetB0Alpha(num_classes=num_classes)
45
+ checkpoint = torch.load(model_path, map_location=device, weights_only=True)
46
+ model.load_state_dict(checkpoint['model_state_dict'])
47
+ model.to(device)
48
+ model.eval()
49
+ return model
50
+
51
+ def evaluate(model, test_loader, device, class_names):
52
+ model.eval()
53
+ correct, total = 0, 0
54
+ all_preds, all_labels = [], []
55
+ with torch.no_grad():
56
+ for images, labels in test_loader:
57
+ images, labels = images.to(device), labels.to(device)
58
+ outputs = model(images)
59
+ _, predicted = torch.max(outputs.data, 1)
60
+ total += labels.size(0)
61
+ correct += (predicted == labels).sum().item()
62
+ all_preds.extend(predicted.cpu().numpy())
63
+ all_labels.extend(labels.cpu().numpy())
64
+ accuracy = 100 * correct / max(total, 1)
65
+ print(f"Test Accuracy: {accuracy:.2f}%")
66
+ print("\nClassification Report:")
67
+ print(classification_report(all_labels, all_preds, target_names=class_names, digits=2))
68
+ cm = confusion_matrix(all_labels, all_preds)
69
+ print("\nConfusion Matrix (True Labels: rows, Predicted Labels: columns):")
70
+ print(pd.DataFrame(cm, index=class_names, columns=class_names))
71
+ return accuracy, cm
72
+
73
+ def main():
74
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
75
+ num_classes = 26
76
+ model_path = "saved_models/best_model.pth"
77
+ test_csv = "dataset/test.csv"
78
+ batch_size = 32
79
+ print("Device being used:", device)
80
+ test_transform = transforms.Compose([
81
+ transforms.Grayscale(num_output_channels=1),
82
+ transforms.Resize((224, 224)),
83
+ transforms.ToTensor(),
84
+ transforms.Normalize(mean=[0.5], std=[0.5])
85
+ ])
86
+ test_dataset = Dataset(test_csv, transform=test_transform)
87
+ test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=4, pin_memory=True)
88
+ class_names = [chr(65 + i) for i in range(26)]
89
+ model = load_model(model_path, num_classes, device)
90
+ evaluate(model, test_loader, device, class_names)
91
+
92
+ if __name__ == "__main__":
93
+ main()
cnn.py ADDED
@@ -0,0 +1,196 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ import torch.optim as optim
4
+ from torch.utils.data import DataLoader, Dataset
5
+ from torchvision import transforms
6
+ from PIL import Image
7
+ import pandas as pd
8
+ import io
9
+ import ast
10
+ import timm
11
+ import os
12
+
13
+ # Model
14
+ class EfficientNetB0Alpha(nn.Module):
15
+ def __init__(self, num_classes=26):
16
+ super().__init__()
17
+ self.model = timm.create_model('efficientnet_b0', pretrained=True, in_chans=1, num_classes=num_classes)
18
+
19
+ def forward(self, x):
20
+ return self.model(x)
21
+
22
+ # Dataset
23
+ class Dataset(Dataset):
24
+ def __init__(self, csv_path, transform=None, image_col='image', label_col='label'):
25
+ self.data = pd.read_csv(csv_path)
26
+ self.transform = transform
27
+ self.image_col = image_col
28
+ self.label_col = label_col
29
+
30
+ def __len__(self):
31
+ return len(self.data)
32
+
33
+ def __getitem__(self, idx):
34
+ img_data = self.data.iloc[idx][self.image_col]
35
+ label = self.data.iloc[idx][self.label_col]
36
+ if isinstance(img_data, str):
37
+ try:
38
+ img_dict = ast.literal_eval(img_data)
39
+ img_bytes = img_dict['bytes']
40
+ except (ValueError, SyntaxError, KeyError) as e:
41
+ raise ValueError(f"Error parsing image data at index {idx}: {e}")
42
+ else:
43
+ img_bytes = img_data['bytes']
44
+ try:
45
+ img = Image.open(io.BytesIO(img_bytes)).convert('L')
46
+ except Exception as e:
47
+ raise ValueError(f"Error decoding image at index {idx}: {e}")
48
+ if self.transform:
49
+ img = self.transform(img)
50
+ return img, label
51
+
52
+ # Training function
53
+ def train(model, train_loader, optimizer, criterion, scheduler, device):
54
+ model.train()
55
+ total_loss, total_correct, total_samples = 0, 0, 0
56
+ for data, targets in train_loader:
57
+ data, targets = data.to(device), targets.to(device)
58
+ optimizer.zero_grad()
59
+ outputs = model(data)
60
+ loss = criterion(outputs, targets)
61
+ loss.backward()
62
+ nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
63
+ optimizer.step()
64
+ if scheduler is not None:
65
+ scheduler.step()
66
+ total_loss += loss.item()
67
+ _, predicted = outputs.max(1)
68
+ total_correct += predicted.eq(targets).sum().item()
69
+ total_samples += targets.size(0)
70
+ avg_loss = total_loss / len(train_loader)
71
+ accuracy = 100. * total_correct / total_samples
72
+ return avg_loss, accuracy
73
+
74
+ # Validation function
75
+ def val(model, val_loader, criterion, device):
76
+ model.eval()
77
+ total_loss, total_correct, total_samples = 0, 0, 0
78
+ with torch.no_grad():
79
+ for data, targets in val_loader:
80
+ data, targets = data.to(device), targets.to(device)
81
+ outputs = model(data)
82
+ loss = criterion(outputs, targets)
83
+ total_loss += loss.item()
84
+ _, predicted = outputs.max(1)
85
+ total_correct += predicted.eq(targets).sum().item()
86
+ total_samples += targets.size(0)
87
+ avg_loss = total_loss / len(val_loader)
88
+ accuracy = 100. * total_correct / total_samples
89
+ return avg_loss, accuracy
90
+
91
+ # Save function
92
+ def save(model, optimizer, epoch, accuracy, class_names, save_path="saved_models/best_model.pth"):
93
+ os.makedirs(os.path.dirname(save_path), exist_ok=True)
94
+ torch.save({
95
+ 'epoch': epoch,
96
+ 'model_state_dict': model.state_dict(),
97
+ 'optimizer_state_dict': optimizer.state_dict(),
98
+ 'accuracy': accuracy,
99
+ 'class_names': class_names
100
+ }, save_path)
101
+
102
+ # Main method
103
+ def main():
104
+ # Config
105
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
106
+ num_classes = 26
107
+ batch_size = 32
108
+ learning_rate = 5e-4
109
+ num_epochs = 25
110
+ patience = 10
111
+ train_csv = "dataset/train.csv"
112
+ val_csv = "dataset/test.csv"
113
+ save_path = "saved_models/best_model.pth"
114
+
115
+ print("Device being used:", device)
116
+
117
+ # Transforms
118
+ train_transform = transforms.Compose([
119
+ transforms.Grayscale(num_output_channels=1),
120
+ transforms.Resize((256, 256)),
121
+ transforms.RandomCrop(224),
122
+ transforms.RandomHorizontalFlip(p=0.5),
123
+ transforms.RandomRotation(degrees=45),
124
+ transforms.ToTensor(),
125
+ transforms.Normalize(mean=[0.5], std=[0.5])
126
+ ])
127
+ val_transform = transforms.Compose([
128
+ transforms.Grayscale(num_output_channels=1),
129
+ transforms.Resize((224, 224)),
130
+ transforms.ToTensor(),
131
+ transforms.Normalize(mean=[0.5], std=[0.5])
132
+ ])
133
+
134
+ # Datasets and loaders
135
+ train_dataset = Dataset(train_csv, transform=train_transform)
136
+ val_dataset = Dataset(val_csv, transform=val_transform)
137
+ train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4, pin_memory=True)
138
+ val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=4, pin_memory=True)
139
+ class_names = [chr(65 + i) for i in range(26)] # ['A', 'B', ..., 'Z']
140
+
141
+ # Model, optimizer, criterion
142
+ model = EfficientNetB0Alpha(num_classes=num_classes).to(device)
143
+ optimizer = optim.AdamW(model.parameters(), lr=learning_rate, weight_decay=1e-4)
144
+ criterion = nn.CrossEntropyLoss(label_smoothing=0.1)
145
+
146
+ # Load checkpoint if it exists
147
+ start_epoch = 0
148
+ best_accuracy = 0.0
149
+ if os.path.exists(save_path):
150
+ try:
151
+ checkpoint = torch.load(save_path, map_location=device)
152
+ model.load_state_dict(checkpoint['model_state_dict'])
153
+ optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
154
+ start_epoch = checkpoint['epoch'] + 1
155
+ best_accuracy = checkpoint['accuracy']
156
+ print(f"Loaded checkpoint from epoch {checkpoint['epoch']} with accuracy {best_accuracy:.2f}%")
157
+ except Exception as e:
158
+ print(f"Error loading checkpoint: {e}. Starting from scratch.")
159
+ else:
160
+ print(f"No checkpoint found at {save_path}. Starting from scratch.")
161
+
162
+ # Scheduler
163
+ scheduler = optim.lr_scheduler.OneCycleLR(
164
+ optimizer,
165
+ max_lr=learning_rate,
166
+ epochs=num_epochs,
167
+ steps_per_epoch=len(train_loader),
168
+ pct_start=0.3,
169
+ anneal_strategy='cos'
170
+ )
171
+
172
+ # Training loop
173
+ early_stopping_counter = 0
174
+ for epoch in range(start_epoch, num_epochs):
175
+ print(f"\nEpoch [{epoch+1}/{num_epochs}]")
176
+ train_loss, train_acc = train(model, train_loader, optimizer, criterion, scheduler, device)
177
+ print(f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}%")
178
+ val_loss, val_acc = val(model, val_loader, criterion, device)
179
+ print(f"Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.2f}%")
180
+
181
+ if val_acc > best_accuracy:
182
+ best_accuracy = val_acc
183
+ save(model, optimizer, epoch, best_accuracy, class_names, save_path)
184
+ print(f"New best model saved with accuracy: {best_accuracy:.2f}%")
185
+ early_stopping_counter = 0
186
+ else:
187
+ early_stopping_counter += 1
188
+
189
+ if early_stopping_counter >= patience:
190
+ print(f"Early stopping triggered. Best accuracy: {best_accuracy:.2f}%")
191
+ break
192
+
193
+ print(f"Training completed. Best validation accuracy: {best_accuracy:.2f}%")
194
+
195
+ if __name__ == "__main__":
196
+ main()
dataset/test-00000-of-00001-bc8b28dacaaa708d.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:802310c181ea4e8339aca3222edeb7ea4ac56199287564031c3f2604e60e3bb8
3
+ size 743978
dataset/test.csv ADDED
The diff for this file is too large to render. See raw diff
 
dataset/train-00000-of-00001-92b9aa4d471d61ab.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:41fd2c7ddf580e3f9a68b76f4bc82619409077c59391bba9eb98199c1b6d6e79
3
+ size 7405967
dataset/train.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ecd6439d57f530923a243cd132ca4a2a09cb38be320b3cb92c16b814b4c0d19b
3
+ size 66182401
main.py ADDED
@@ -0,0 +1,212 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import time
3
+ import numpy as np
4
+ import cv2
5
+ import mediapipe as mp
6
+ from prediction import predict_from_image
7
+ from PredictWord import PredictWord, clear_notepad_file
8
+
9
+ Header_path = "Assets/header"
10
+ myList = os.listdir(Header_path)
11
+ cam = cv2.VideoCapture(0)
12
+ wCam, hCam = 1280, 720
13
+
14
+
15
+ class HandDetector:
16
+ def __init__(self, mode=False, maxHands=2, modelComplexity=1, detectionCon=0.8, trackCon=0.8):
17
+ self.mode = mode
18
+ self.maxHands = maxHands
19
+ self.modelComplexity = modelComplexity
20
+ self.detectionCon = detectionCon
21
+ self.trackCon = trackCon
22
+ self.mpHands = mp.solutions.hands
23
+ self.hands = self.mpHands.Hands(self.mode, self.maxHands, self.modelComplexity, self.detectionCon,
24
+ self.trackCon)
25
+ self.mpDraw = mp.solutions.drawing_utils
26
+ self.tipIds = [4, 8, 12, 16, 20]
27
+ self.lmList = []
28
+
29
+ def findHands(self, img):
30
+ imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
31
+ self.results = self.hands.process(imgRGB)
32
+ if self.results.multi_hand_landmarks:
33
+ for handLms in self.results.multi_hand_landmarks:
34
+ self.mpDraw.draw_landmarks(img, handLms, self.mpHands.HAND_CONNECTIONS)
35
+ return img
36
+
37
+ def findPosition(self, img, handNo=0):
38
+ self.lmList = []
39
+ if self.results.multi_hand_landmarks:
40
+ myHand = self.results.multi_hand_landmarks[handNo]
41
+ for id, lm in enumerate(myHand.landmark):
42
+ h, w, c = img.shape
43
+ cx, cy = int(lm.x * w), int(lm.y * h)
44
+ self.lmList.append([id, cx, cy])
45
+ return self.lmList
46
+
47
+ def fingerup(self):
48
+ fingers = []
49
+ # Thumb
50
+ if self.lmList[self.tipIds[0]][1] < self.lmList[self.tipIds[0] - 1][1]:
51
+ fingers.append(1)
52
+ else:
53
+ fingers.append(0)
54
+
55
+ for id in range(1, 5):
56
+ if self.lmList[self.tipIds[id]][2] < self.lmList[self.tipIds[id] - 2][2]:
57
+ fingers.append(1)
58
+ else:
59
+ fingers.append(0)
60
+ return fingers
61
+
62
+ def main():
63
+ detector = HandDetector()
64
+ cTime = 0
65
+
66
+ overlayList = []
67
+ drawColor = (0, 0, 255)
68
+ for impath in myList:
69
+ image = cv2.imread(f'{Header_path}/{impath}')
70
+ if image is not None:
71
+ overlayList.append(image)
72
+ header = None
73
+ if overlayList:
74
+ header = cv2.resize(overlayList[0], (1280, 125)) if overlayList[0].shape != (125, 1280, 3) else overlayList[0]
75
+
76
+ RightBar = cv2.imread('Assets/sidebar/right.png')
77
+ RightBar = cv2.resize(RightBar, (230, 595))
78
+ LeftBar = cv2.imread('Assets/sidebar/left.png')
79
+ LeftBar = cv2.resize(LeftBar, (226, 300))
80
+
81
+ mode = "Drawing Mode"
82
+ canvas = np.zeros((720, 1280, 3), np.uint8)
83
+ submode = "Letter_Prediction"
84
+ predicted_letter = ""
85
+ clear_notepad_file(output_dir='output', filename='output.txt')
86
+ xp, yp = 0, 0
87
+ while True:
88
+ success, img = cam.read()
89
+ img = cv2.resize(img, (wCam, hCam))
90
+ img = cv2.flip(img, 1)
91
+ img = detector.findHands(img)
92
+ lmlist = detector.findPosition(img)
93
+
94
+ # Only process drawing if hand landmarks are detected
95
+ if len(lmlist) != 0:
96
+ x1, y1 = lmlist[8][1:3]
97
+ x2, y2 = lmlist[12][1:3]
98
+ fingers = []
99
+ if lmlist:
100
+ fingers = detector.fingerup()
101
+
102
+ # Selection Mode: both index and middle finger up
103
+ if fingers[1] == 1 and fingers[2] == 1:
104
+ xp, yp = 0, 0
105
+ if y1 < 125 and len(overlayList) >= 2:
106
+ if 0 < x1 < 271:
107
+ drawColor = (0, 0, 255)
108
+ header = cv2.resize(overlayList[0], (1280, 125))
109
+ elif 850 < x1 < 1280 and len(overlayList) > 1:
110
+ drawColor = (0, 0, 0)
111
+ header = cv2.resize(overlayList[1], (1280, 125))
112
+ cv2.rectangle(img, (x1, y1 - 25), (x2, y2 + 25), drawColor, cv2.FILLED)
113
+
114
+ # Rightbar actions
115
+ if x1 > 1050:
116
+ if 125 < y1 < 250:
117
+ canvas = np.zeros((720, 1280, 3), np.uint8) # Clear canvas
118
+ if 260 < y1 < 385:
119
+ pass
120
+ if 385 < y1 < 510:
121
+ mode = "Drawing Mode"
122
+ if 510 < y1 < 635:
123
+ mode = "Prediction Mode"
124
+
125
+ # Drawing Mode: only index finger up
126
+ if len(fingers) >= 3 and fingers[1] and not fingers[2] and mode == "Drawing Mode":
127
+ if xp == 0 and yp == 0:
128
+ xp, yp = x1, y1
129
+ xp, yp = x1, y1
130
+
131
+ if drawColor == (0, 0, 0):
132
+ cv2.circle(img, (x1, y1), 30, drawColor, cv2.FILLED)
133
+ cv2.line(canvas, (xp, yp), (x1, y1), drawColor, 75)
134
+ else:
135
+ cv2.line(canvas, (xp, yp), (x1, y1), drawColor, 15)
136
+ xp, yp = x1, y1
137
+
138
+
139
+ if mode == "Prediction Mode":
140
+ if LeftBar is not None:
141
+ img[125:425, 0:226] = LeftBar
142
+ if len(fingers) >= 3 and fingers[1] and not fingers[2]:
143
+ if xp == 0 and yp == 0:
144
+ xp, yp = x1, y1
145
+ xp, yp = x1, y1
146
+
147
+ if drawColor == (0, 0, 0):
148
+ cv2.circle(img, (x1, y1), 30, drawColor, cv2.FILLED)
149
+ cv2.line(canvas, (xp, yp), (x1, y1), drawColor, 75)
150
+ else:
151
+ cv2.line(canvas, (xp, yp), (x1, y1), drawColor, 15)
152
+ xp, yp = x1, y1
153
+
154
+ # Leftbar actions
155
+ if x1 < 300:
156
+ if 150 < y1 < 300:
157
+ submode = "Letter Prediction"
158
+ cv2.imwrite("Output/Letter.png", canvas)
159
+ predicted_letter, confidence = predict_from_image("Output/Letter.png")
160
+ cv2.putText(img, f'Predicted Letter: {predicted_letter}', (50, 500), cv2.FONT_HERSHEY_TRIPLEX,
161
+ 1, (255, 0, 255), 2)
162
+ prediction_time = time.time()
163
+ reset_canvas = True
164
+
165
+ if 315 < y1 < 405:
166
+ submode = "Word Prediction"
167
+ cv2.imwrite("Output/Word.png", canvas)
168
+ predictor = PredictWord("Output/Word.png")
169
+ result = predictor.predict()
170
+ print("Detected word:", result)
171
+ PredictWord.save_and_speak_word(result, output_dir='output', filename='output.txt')
172
+ canvas = np.zeros((720, 1280, 3), np.uint8)
173
+ #
174
+ # # Place this outside the x1 < 300 block, so it runs every frame
175
+ # if reset_canvas and prediction_time is not None:
176
+ # if time.time() - prediction_time > 5:
177
+ # canvas = np.zeros((720, 1280, 3), np.uint8)
178
+ # reset_canvas = False
179
+ # prediction_time = None
180
+
181
+ # Combine canvas and camera image using bitwise operations
182
+ imgGray = cv2.cvtColor(canvas, cv2.COLOR_BGR2GRAY)
183
+ _, imgInv = cv2.threshold(imgGray, 50, 255, cv2.THRESH_BINARY_INV)
184
+ imgInv = cv2.cvtColor(imgInv, cv2.COLOR_GRAY2BGR)
185
+ img = cv2.bitwise_and(img, imgInv)
186
+ img = cv2.bitwise_or(img, canvas)
187
+
188
+ # Calculate FPS (frames per second)
189
+ pTime = time.time()
190
+ fps = 1 / (pTime - cTime) if cTime != 0 else 0
191
+ cTime = pTime
192
+
193
+ # Overlay header and RightBar only if they are loaded (robustness)
194
+ if header is not None:
195
+ img[0:125, 0:1280] = header
196
+ if RightBar is not None:
197
+ img[125:720, 1050:1280] = RightBar
198
+
199
+
200
+
201
+
202
+ cv2.putText(img, f"Mode : {mode}", (1065, 645), cv2.FONT_HERSHEY_TRIPLEX, 0.5, (255, 0, 255), 1)
203
+ cv2.putText(img, f'FPS: {int(fps)}', (1095, 695), cv2.FONT_HERSHEY_TRIPLEX, 1, (255, 0, 255), 1)
204
+ cv2.imshow("Canvas", canvas)
205
+ cv2.imshow("Image", img)
206
+ if cv2.waitKey(1) & 0xFF == ord('q'):
207
+ break
208
+
209
+ if __name__ == "__main__":
210
+ main()
211
+ cam.release()
212
+ cv2.destroyAllWindows()
prediction.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ from torchvision import transforms
4
+ from PIL import Image
5
+ import timm
6
+ import os
7
+
8
+ class EfficientNetB0Alpha(nn.Module):
9
+ def __init__(self, num_classes=26):
10
+ super().__init__()
11
+ self.model = timm.create_model('efficientnet_b0', pretrained=False, in_chans=1, num_classes=num_classes)
12
+
13
+ def forward(self, x):
14
+ return self.model(x)
15
+
16
+ # Load model and class names once
17
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
18
+ checkpoint_path = 'saved_models/best_model.pth'
19
+ num_classes = 26
20
+
21
+ transform = transforms.Compose([
22
+ transforms.Grayscale(num_output_channels=1),
23
+ transforms.Resize(224),
24
+ transforms.CenterCrop(224),
25
+ transforms.ToTensor(),
26
+ transforms.Normalize(mean=[0.5], std=[0.5])
27
+ ])
28
+
29
+ model = EfficientNetB0Alpha(num_classes=num_classes).to(device)
30
+ if not os.path.exists(checkpoint_path):
31
+ raise FileNotFoundError(f"Checkpoint not found at {checkpoint_path}")
32
+ checkpoint = torch.load(checkpoint_path, map_location=device, weights_only=True)
33
+ model.load_state_dict(checkpoint['model_state_dict'])
34
+ class_names = checkpoint['class_names']
35
+
36
+ def predict_from_image(image_path):
37
+
38
+ img = Image.open(image_path).convert('L')
39
+ img = transform(img)
40
+ img = img.unsqueeze(0).to(device)
41
+ model.eval()
42
+ with torch.no_grad():
43
+ outputs = model(img)
44
+ probabilities = torch.softmax(outputs, dim=1)
45
+ confidence, predicted = torch.max(probabilities, 1)
46
+ predicted_class = class_names[predicted.item()]
47
+ confidence = confidence.item()
48
+ return predicted_class, confidence
requirements.txt ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ torch>=1.9.0
2
+ torchvision>=0.10.0
3
+ timm>=0.6.0
4
+ pandas>=1.3.0
5
+ Pillow>=9.0.0
6
+ numpy>=1.21.0
7
+ opencv-python>=4.5.0
8
+ mediapipe>=0.8.9
9
+ pytesseract>=0.3.8
10
+ pyttsx3>=2.90
11
+ scikit-learn>=1.0.0
saved_models/best_model.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e25438c897561cd81ac5b1aefd353af47079bd22a57fbc7e80050c884d2419ef
3
+ size 48963941