Initial commit

Browse files

Files changed (12) hide show

.gitattributes +1 -0
PredictWord.py +49 -0
ValidationChecker.py +93 -0
cnn.py +196 -0
dataset/test-00000-of-00001-bc8b28dacaaa708d.parquet +3 -0
dataset/test.csv +0 -0
dataset/train-00000-of-00001-92b9aa4d471d61ab.parquet +3 -0
dataset/train.csv +3 -0
main.py +212 -0
prediction.py +48 -0
requirements.txt +11 -0
saved_models/best_model.pth +3 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+dataset/train.csv filter=lfs diff=lfs merge=lfs -text

PredictWord.py ADDED Viewed

	@@ -0,0 +1,49 @@

+import cv2
+import pytesseract
+import pyttsx3
+import os
+import subprocess
+import time
+pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'
+class PredictWord:
+    def __init__(self, image_path):
+        self.image_path = image_path
+    def predict(self):
+        image = cv2.imread(self.image_path)
+        if image is None:
+            print(f"Error: Image not found at '{self.image_path}'")
+            return None
+        # Convert to gray
+        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
+        custom_config = r'--oem 3 --psm 6'
+        word = pytesseract.image_to_string(gray, config=custom_config)
+        return word.strip()  # 👈 This line is missing in your code
+    @staticmethod
+    def save_and_speak_word(word, output_dir='output', filename='output.txt'):
+        # Ensure output directory exists
+        os.makedirs(output_dir, exist_ok=True)
+        file_path = os.path.abspath(os.path.join(output_dir, filename))
+        # Write word to file
+        with open(file_path, 'w', encoding='utf-8') as f:
+            f.write(word + '\n\n')  # Adds space after the w
+        # Open file in Notepad
+        subprocess.Popen(['notepad.exe', file_path])
+        time.sleep(1)  # Give Notepad time to open
+        # Speak the word
+        engine = pyttsx3.init()
+        engine.say(word)
+        engine.runAndWait()
+def clear_notepad_file(output_dir='output', filename='output.txt'):
+    file_path = os.path.abspath(os.path.join(output_dir, filename))
+    with open(file_path, 'w', encoding='utf-8') as f:
+        f.write('')

ValidationChecker.py ADDED Viewed

	@@ -0,0 +1,93 @@

+import torch
+import torch.nn as nn
+from torch.utils.data import DataLoader, Dataset
+from torchvision import transforms
+from PIL import Image
+import pandas as pd
+import io
+import ast
+import os
+from sklearn.metrics import confusion_matrix, classification_report
+import timm
+class EfficientNetB0Alpha(nn.Module):
+    def __init__(self, num_classes=26):
+        super().__init__()
+        self.model = timm.create_model('efficientnet_b0', pretrained=True, in_chans=1, num_classes=num_classes)
+    def forward(self, x):
+        return self.model(x)
+class Dataset(Dataset):
+    def __init__(self, csv_path, transform=None, image_col='image', label_col='label'):
+        self.data = pd.read_csv(csv_path)
+        self.transform = transform
+        self.image_col = image_col
+        self.label_col = label_col
+    def __len__(self):
+        return len(self.data)
+    def __getitem__(self, idx):
+        img_data = self.data.iloc[idx][self.image_col]
+        label = self.data.iloc[idx][self.label_col]
+        if isinstance(img_data, str):
+            img_dict = ast.literal_eval(img_data)
+            img_bytes = img_dict['bytes']
+        else:
+            img_bytes = img_data['bytes']
+        img = Image.open(io.BytesIO(img_bytes)).convert('L')
+        if self.transform:
+            img = self.transform(img)
+        return img, label
+def load_model(model_path, num_classes, device):
+    if not os.path.exists(model_path):
+        raise FileNotFoundError(f"Model file not found at {model_path}")
+    model = EfficientNetB0Alpha(num_classes=num_classes)
+    checkpoint = torch.load(model_path, map_location=device, weights_only=True)
+    model.load_state_dict(checkpoint['model_state_dict'])
+    model.to(device)
+    model.eval()
+    return model
+def evaluate(model, test_loader, device, class_names):
+    model.eval()
+    correct, total = 0, 0
+    all_preds, all_labels = [], []
+    with torch.no_grad():
+        for images, labels in test_loader:
+            images, labels = images.to(device), labels.to(device)
+            outputs = model(images)
+            _, predicted = torch.max(outputs.data, 1)
+            total += labels.size(0)
+            correct += (predicted == labels).sum().item()
+            all_preds.extend(predicted.cpu().numpy())
+            all_labels.extend(labels.cpu().numpy())
+    accuracy = 100 * correct / max(total, 1)
+    print(f"Test Accuracy: {accuracy:.2f}%")
+    print("\nClassification Report:")
+    print(classification_report(all_labels, all_preds, target_names=class_names, digits=2))
+    cm = confusion_matrix(all_labels, all_preds)
+    print("\nConfusion Matrix (True Labels: rows, Predicted Labels: columns):")
+    print(pd.DataFrame(cm, index=class_names, columns=class_names))
+    return accuracy, cm
+def main():
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    num_classes = 26
+    model_path = "saved_models/best_model.pth"
+    test_csv = "dataset/test.csv"
+    batch_size = 32
+    print("Device being used:", device)
+    test_transform = transforms.Compose([
+        transforms.Grayscale(num_output_channels=1),
+        transforms.Resize((224, 224)),
+        transforms.ToTensor(),
+        transforms.Normalize(mean=[0.5], std=[0.5])
+    ])
+    test_dataset = Dataset(test_csv, transform=test_transform)
+    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=4, pin_memory=True)
+    class_names = [chr(65 + i) for i in range(26)]
+    model = load_model(model_path, num_classes, device)
+    evaluate(model, test_loader, device, class_names)
+if __name__ == "__main__":
+    main()

cnn.py ADDED Viewed

	@@ -0,0 +1,196 @@

+import torch
+import torch.nn as nn
+import torch.optim as optim
+from torch.utils.data import DataLoader, Dataset
+from torchvision import transforms
+from PIL import Image
+import pandas as pd
+import io
+import ast
+import timm
+import os
+# Model
+class EfficientNetB0Alpha(nn.Module):
+    def __init__(self, num_classes=26):
+        super().__init__()
+        self.model = timm.create_model('efficientnet_b0', pretrained=True, in_chans=1, num_classes=num_classes)
+    def forward(self, x):
+        return self.model(x)
+# Dataset
+class Dataset(Dataset):
+    def __init__(self, csv_path, transform=None, image_col='image', label_col='label'):
+        self.data = pd.read_csv(csv_path)
+        self.transform = transform
+        self.image_col = image_col
+        self.label_col = label_col
+    def __len__(self):
+        return len(self.data)
+    def __getitem__(self, idx):
+        img_data = self.data.iloc[idx][self.image_col]
+        label = self.data.iloc[idx][self.label_col]
+        if isinstance(img_data, str):
+            try:
+                img_dict = ast.literal_eval(img_data)
+                img_bytes = img_dict['bytes']
+            except (ValueError, SyntaxError, KeyError) as e:
+                raise ValueError(f"Error parsing image data at index {idx}: {e}")
+        else:
+            img_bytes = img_data['bytes']
+        try:
+            img = Image.open(io.BytesIO(img_bytes)).convert('L')
+        except Exception as e:
+            raise ValueError(f"Error decoding image at index {idx}: {e}")
+        if self.transform:
+            img = self.transform(img)
+        return img, label
+# Training function
+def train(model, train_loader, optimizer, criterion, scheduler, device):
+    model.train()
+    total_loss, total_correct, total_samples = 0, 0, 0
+    for data, targets in train_loader:
+        data, targets = data.to(device), targets.to(device)
+        optimizer.zero_grad()
+        outputs = model(data)
+        loss = criterion(outputs, targets)
+        loss.backward()
+        nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
+        optimizer.step()
+        if scheduler is not None:
+            scheduler.step()
+        total_loss += loss.item()
+        _, predicted = outputs.max(1)
+        total_correct += predicted.eq(targets).sum().item()
+        total_samples += targets.size(0)
+    avg_loss = total_loss / len(train_loader)
+    accuracy = 100. * total_correct / total_samples
+    return avg_loss, accuracy
+# Validation function
+def val(model, val_loader, criterion, device):
+    model.eval()
+    total_loss, total_correct, total_samples = 0, 0, 0
+    with torch.no_grad():
+        for data, targets in val_loader:
+            data, targets = data.to(device), targets.to(device)
+            outputs = model(data)
+            loss = criterion(outputs, targets)
+            total_loss += loss.item()
+            _, predicted = outputs.max(1)
+            total_correct += predicted.eq(targets).sum().item()
+            total_samples += targets.size(0)
+    avg_loss = total_loss / len(val_loader)
+    accuracy = 100. * total_correct / total_samples
+    return avg_loss, accuracy
+# Save function
+def save(model, optimizer, epoch, accuracy, class_names, save_path="saved_models/best_model.pth"):
+    os.makedirs(os.path.dirname(save_path), exist_ok=True)
+    torch.save({
+        'epoch': epoch,
+        'model_state_dict': model.state_dict(),
+        'optimizer_state_dict': optimizer.state_dict(),
+        'accuracy': accuracy,
+        'class_names': class_names
+    }, save_path)
+# Main method
+def main():
+    # Config
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    num_classes = 26
+    batch_size = 32
+    learning_rate = 5e-4
+    num_epochs = 25
+    patience = 10
+    train_csv = "dataset/train.csv"
+    val_csv = "dataset/test.csv"
+    save_path = "saved_models/best_model.pth"
+    print("Device being used:", device)
+    # Transforms
+    train_transform = transforms.Compose([
+        transforms.Grayscale(num_output_channels=1),
+        transforms.Resize((256, 256)),
+        transforms.RandomCrop(224),
+        transforms.RandomHorizontalFlip(p=0.5),
+        transforms.RandomRotation(degrees=45),
+        transforms.ToTensor(),
+        transforms.Normalize(mean=[0.5], std=[0.5])
+    ])
+    val_transform = transforms.Compose([
+        transforms.Grayscale(num_output_channels=1),
+        transforms.Resize((224, 224)),
+        transforms.ToTensor(),
+        transforms.Normalize(mean=[0.5], std=[0.5])
+    ])
+    # Datasets and loaders
+    train_dataset = Dataset(train_csv, transform=train_transform)
+    val_dataset = Dataset(val_csv, transform=val_transform)
+    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4, pin_memory=True)
+    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=4, pin_memory=True)
+    class_names = [chr(65 + i) for i in range(26)]  # ['A', 'B', ..., 'Z']
+    # Model, optimizer, criterion
+    model = EfficientNetB0Alpha(num_classes=num_classes).to(device)
+    optimizer = optim.AdamW(model.parameters(), lr=learning_rate, weight_decay=1e-4)
+    criterion = nn.CrossEntropyLoss(label_smoothing=0.1)
+    # Load checkpoint if it exists
+    start_epoch = 0
+    best_accuracy = 0.0
+    if os.path.exists(save_path):
+        try:
+            checkpoint = torch.load(save_path, map_location=device)
+            model.load_state_dict(checkpoint['model_state_dict'])
+            optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
+            start_epoch = checkpoint['epoch'] + 1
+            best_accuracy = checkpoint['accuracy']
+            print(f"Loaded checkpoint from epoch {checkpoint['epoch']} with accuracy {best_accuracy:.2f}%")
+        except Exception as e:
+            print(f"Error loading checkpoint: {e}. Starting from scratch.")
+    else:
+        print(f"No checkpoint found at {save_path}. Starting from scratch.")
+    # Scheduler
+    scheduler = optim.lr_scheduler.OneCycleLR(
+        optimizer,
+        max_lr=learning_rate,
+        epochs=num_epochs,
+        steps_per_epoch=len(train_loader),
+        pct_start=0.3,
+        anneal_strategy='cos'
+    )
+    # Training loop
+    early_stopping_counter = 0
+    for epoch in range(start_epoch, num_epochs):
+        print(f"\nEpoch [{epoch+1}/{num_epochs}]")
+        train_loss, train_acc = train(model, train_loader, optimizer, criterion, scheduler, device)
+        print(f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}%")
+        val_loss, val_acc = val(model, val_loader, criterion, device)
+        print(f"Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.2f}%")
+        if val_acc > best_accuracy:
+            best_accuracy = val_acc
+            save(model, optimizer, epoch, best_accuracy, class_names, save_path)
+            print(f"New best model saved with accuracy: {best_accuracy:.2f}%")
+            early_stopping_counter = 0
+        else:
+            early_stopping_counter += 1
+        if early_stopping_counter >= patience:
+            print(f"Early stopping triggered. Best accuracy: {best_accuracy:.2f}%")
+            break
+    print(f"Training completed. Best validation accuracy: {best_accuracy:.2f}%")
+if __name__ == "__main__":
+    main()

dataset/test-00000-of-00001-bc8b28dacaaa708d.parquet ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:802310c181ea4e8339aca3222edeb7ea4ac56199287564031c3f2604e60e3bb8
+size 743978

dataset/test.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

dataset/train-00000-of-00001-92b9aa4d471d61ab.parquet ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:41fd2c7ddf580e3f9a68b76f4bc82619409077c59391bba9eb98199c1b6d6e79
+size 7405967

dataset/train.csv ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ecd6439d57f530923a243cd132ca4a2a09cb38be320b3cb92c16b814b4c0d19b
+size 66182401

main.py ADDED Viewed

	@@ -0,0 +1,212 @@

+import os
+import time
+import numpy as np
+import cv2
+import mediapipe as mp
+from prediction import predict_from_image
+from PredictWord import PredictWord, clear_notepad_file
+Header_path = "Assets/header"
+myList = os.listdir(Header_path)
+cam = cv2.VideoCapture(0)
+wCam, hCam = 1280, 720
+class HandDetector:
+    def __init__(self, mode=False, maxHands=2, modelComplexity=1, detectionCon=0.8, trackCon=0.8):
+        self.mode = mode
+        self.maxHands = maxHands
+        self.modelComplexity = modelComplexity
+        self.detectionCon = detectionCon
+        self.trackCon = trackCon
+        self.mpHands = mp.solutions.hands
+        self.hands = self.mpHands.Hands(self.mode, self.maxHands, self.modelComplexity, self.detectionCon,
+                                        self.trackCon)
+        self.mpDraw = mp.solutions.drawing_utils
+        self.tipIds = [4, 8, 12, 16, 20]
+        self.lmList = []
+    def findHands(self, img):
+        imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+        self.results = self.hands.process(imgRGB)
+        if self.results.multi_hand_landmarks:
+            for handLms in self.results.multi_hand_landmarks:
+                self.mpDraw.draw_landmarks(img, handLms, self.mpHands.HAND_CONNECTIONS)
+        return img
+    def findPosition(self, img, handNo=0):
+        self.lmList = []
+        if self.results.multi_hand_landmarks:
+            myHand = self.results.multi_hand_landmarks[handNo]
+            for id, lm in enumerate(myHand.landmark):
+                h, w, c = img.shape
+                cx, cy = int(lm.x * w), int(lm.y * h)
+                self.lmList.append([id, cx, cy])
+        return self.lmList
+    def fingerup(self):
+        fingers = []
+        # Thumb
+        if self.lmList[self.tipIds[0]][1] < self.lmList[self.tipIds[0] - 1][1]:
+            fingers.append(1)
+        else:
+            fingers.append(0)
+        for id in range(1, 5):
+            if self.lmList[self.tipIds[id]][2] < self.lmList[self.tipIds[id] - 2][2]:
+                fingers.append(1)
+            else:
+                fingers.append(0)
+        return fingers
+def main():
+    detector = HandDetector()
+    cTime = 0
+    overlayList = []
+    drawColor = (0, 0, 255)
+    for impath in myList:
+        image = cv2.imread(f'{Header_path}/{impath}')
+        if image is not None:
+            overlayList.append(image)
+    header = None
+    if overlayList:
+        header = cv2.resize(overlayList[0], (1280, 125)) if overlayList[0].shape != (125, 1280, 3) else overlayList[0]
+    RightBar = cv2.imread('Assets/sidebar/right.png')
+    RightBar = cv2.resize(RightBar, (230, 595))
+    LeftBar = cv2.imread('Assets/sidebar/left.png')
+    LeftBar = cv2.resize(LeftBar, (226, 300))
+    mode = "Drawing Mode"
+    canvas = np.zeros((720, 1280, 3), np.uint8)
+    submode = "Letter_Prediction"
+    predicted_letter = ""
+    clear_notepad_file(output_dir='output', filename='output.txt')
+    xp, yp = 0, 0
+    while True:
+        success, img = cam.read()
+        img = cv2.resize(img, (wCam, hCam))
+        img = cv2.flip(img, 1)
+        img = detector.findHands(img)
+        lmlist = detector.findPosition(img)
+        # Only process drawing if hand landmarks are detected
+        if len(lmlist) != 0:
+            x1, y1 = lmlist[8][1:3]
+            x2, y2 = lmlist[12][1:3]
+            fingers = []
+        if lmlist:
+            fingers = detector.fingerup()
+            # Selection Mode: both index and middle finger up
+            if fingers[1] == 1 and fingers[2] == 1:
+                xp, yp = 0, 0
+                if y1 < 125 and len(overlayList) >= 2:
+                    if 0 < x1 < 271:
+                        drawColor = (0, 0, 255)
+                        header = cv2.resize(overlayList[0], (1280, 125))
+                    elif 850 < x1 < 1280 and len(overlayList) > 1:
+                        drawColor = (0, 0, 0)
+                        header = cv2.resize(overlayList[1], (1280, 125))
+                cv2.rectangle(img, (x1, y1 - 25), (x2, y2 + 25), drawColor, cv2.FILLED)
+                # Rightbar actions
+                if x1 > 1050:
+                    if 125 < y1 < 250:
+                        canvas = np.zeros((720, 1280, 3), np.uint8)  # Clear canvas
+                    if 260 < y1 < 385:
+                        pass
+                    if 385 < y1 < 510:
+                        mode = "Drawing Mode"
+                    if 510 < y1 < 635:
+                        mode = "Prediction Mode"
+            # Drawing Mode: only index finger up
+            if len(fingers) >= 3 and fingers[1] and not fingers[2] and mode == "Drawing Mode":
+                if xp == 0 and yp == 0:
+                    xp, yp = x1, y1
+                xp, yp = x1, y1
+                if drawColor == (0, 0, 0):
+                    cv2.circle(img, (x1, y1), 30, drawColor, cv2.FILLED)
+                    cv2.line(canvas, (xp, yp), (x1, y1), drawColor, 75)
+                else:
+                    cv2.line(canvas, (xp, yp), (x1, y1), drawColor, 15)
+                xp, yp = x1, y1
+            if  mode == "Prediction Mode":
+                if LeftBar is not None:
+                    img[125:425, 0:226] = LeftBar
+                if len(fingers) >= 3 and fingers[1] and not fingers[2]:
+                    if xp == 0 and yp == 0:
+                        xp, yp = x1, y1
+                    xp, yp = x1, y1
+                    if drawColor == (0, 0, 0):
+                        cv2.circle(img, (x1, y1), 30, drawColor, cv2.FILLED)
+                        cv2.line(canvas, (xp, yp), (x1, y1), drawColor, 75)
+                    else:
+                        cv2.line(canvas, (xp, yp), (x1, y1), drawColor, 15)
+                    xp, yp = x1, y1
+                # Leftbar actions
+                if x1 < 300:
+                    if 150 < y1 < 300:
+                        submode = "Letter Prediction"
+                        cv2.imwrite("Output/Letter.png", canvas)
+                        predicted_letter, confidence = predict_from_image("Output/Letter.png")
+                        cv2.putText(img, f'Predicted Letter: {predicted_letter}', (50, 500), cv2.FONT_HERSHEY_TRIPLEX,
+                                    1, (255, 0, 255), 2)
+                        prediction_time = time.time()
+                        reset_canvas = True
+                    if 315 < y1 < 405:
+                        submode = "Word Prediction"
+                        cv2.imwrite("Output/Word.png", canvas)
+                        predictor = PredictWord("Output/Word.png")
+                        result = predictor.predict()
+                        print("Detected word:", result)
+                        PredictWord.save_and_speak_word(result, output_dir='output', filename='output.txt')
+                        canvas = np.zeros((720, 1280, 3), np.uint8)
+            #
+            #     # Place this outside the x1 < 300 block, so it runs every frame
+            # if reset_canvas and prediction_time is not None:
+            #     if time.time() - prediction_time > 5:
+            #         canvas = np.zeros((720, 1280, 3), np.uint8)
+            #         reset_canvas = False
+            #         prediction_time = None
+        # Combine canvas and camera image using bitwise operations
+        imgGray = cv2.cvtColor(canvas, cv2.COLOR_BGR2GRAY)
+        _, imgInv = cv2.threshold(imgGray, 50, 255, cv2.THRESH_BINARY_INV)
+        imgInv = cv2.cvtColor(imgInv, cv2.COLOR_GRAY2BGR)
+        img = cv2.bitwise_and(img, imgInv)
+        img = cv2.bitwise_or(img, canvas)
+        # Calculate FPS (frames per second)
+        pTime = time.time()
+        fps = 1 / (pTime - cTime) if cTime != 0 else 0
+        cTime = pTime
+        # Overlay header and RightBar only if they are loaded (robustness)
+        if header is not None:
+            img[0:125, 0:1280] = header
+        if RightBar is not None:
+            img[125:720, 1050:1280] = RightBar
+        cv2.putText(img, f"Mode : {mode}", (1065, 645), cv2.FONT_HERSHEY_TRIPLEX, 0.5, (255, 0, 255), 1)
+        cv2.putText(img, f'FPS: {int(fps)}', (1095, 695), cv2.FONT_HERSHEY_TRIPLEX, 1, (255, 0, 255), 1)
+        cv2.imshow("Canvas", canvas)
+        cv2.imshow("Image", img)
+        if cv2.waitKey(1) & 0xFF == ord('q'):
+            break
+if __name__ == "__main__":
+    main()
+    cam.release()
+    cv2.destroyAllWindows()

prediction.py ADDED Viewed

	@@ -0,0 +1,48 @@

+import torch
+import torch.nn as nn
+from torchvision import transforms
+from PIL import Image
+import timm
+import os
+class EfficientNetB0Alpha(nn.Module):
+    def __init__(self, num_classes=26):
+        super().__init__()
+        self.model = timm.create_model('efficientnet_b0', pretrained=False, in_chans=1, num_classes=num_classes)
+    def forward(self, x):
+        return self.model(x)
+# Load model and class names once
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+checkpoint_path = 'saved_models/best_model.pth'
+num_classes = 26
+transform = transforms.Compose([
+    transforms.Grayscale(num_output_channels=1),
+    transforms.Resize(224),
+    transforms.CenterCrop(224),
+    transforms.ToTensor(),
+    transforms.Normalize(mean=[0.5], std=[0.5])
+])
+model = EfficientNetB0Alpha(num_classes=num_classes).to(device)
+if not os.path.exists(checkpoint_path):
+    raise FileNotFoundError(f"Checkpoint not found at {checkpoint_path}")
+checkpoint = torch.load(checkpoint_path, map_location=device, weights_only=True)
+model.load_state_dict(checkpoint['model_state_dict'])
+class_names = checkpoint['class_names']
+def predict_from_image(image_path):
+    img = Image.open(image_path).convert('L')
+    img = transform(img)
+    img = img.unsqueeze(0).to(device)
+    model.eval()
+    with torch.no_grad():
+        outputs = model(img)
+        probabilities = torch.softmax(outputs, dim=1)
+        confidence, predicted = torch.max(probabilities, 1)
+        predicted_class = class_names[predicted.item()]
+        confidence = confidence.item()
+    return predicted_class, confidence

requirements.txt ADDED Viewed

	@@ -0,0 +1,11 @@

+torch>=1.9.0
+torchvision>=0.10.0
+timm>=0.6.0
+pandas>=1.3.0
+Pillow>=9.0.0
+numpy>=1.21.0
+opencv-python>=4.5.0
+mediapipe>=0.8.9
+pytesseract>=0.3.8
+pyttsx3>=2.90
+scikit-learn>=1.0.0

saved_models/best_model.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e25438c897561cd81ac5b1aefd353af47079bd22a57fbc7e80050c884d2419ef
+size 48963941