import torch import torch.nn as nn import torchvision.models as models from torchvision import transforms from PIL import Image import gradio as gr # Classes must match your training dataset class_names = [ "A","B","C","D","E","F","G","H","I","J","K","L","M","N","O","P","Q","R","S","T","U","V","W","X","Y","Z" ] # Transform (same as training) transform = transforms.Compose([ transforms.Resize((224,224)), transforms.ToTensor(), transforms.Normalize([0.5]*3, [0.5]*3) ]) # Load model def load_model(): model = models.mobilenet_v2(pretrained=False) model.classifier[1] = nn.Linear(model.classifier[1].in_features, len(class_names)) model.load_state_dict(torch.load("isl_model.pth", map_location="cpu")) model.eval() return model model = load_model() # Prediction function def predict(img: Image.Image): with torch.no_grad(): x = transform(img).unsqueeze(0) out = model(x) return class_names[out.argmax(1).item()] # Gradio interface demo = gr.Interface( fn=predict, inputs=gr.Image(type="pil"), outputs="text", title="ISL Alphabet Recognition", description="Upload a hand sign image (A–Z) to get the predicted letter." ) demo.launch()