import torch
import torch.nn as nn
import torchvision.models as models
from torchvision import transforms
from PIL import Image
import gradio as gr

# Classes must match your training dataset
class_names = [ "A","B","C","D","E","F","G","H","I","J","K","L","M","N","O","P","Q","R","S","T","U","V","W","X","Y","Z" ]

# Transform (same as training)
transform = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.ToTensor(),
    transforms.Normalize([0.5]*3, [0.5]*3)
])

# Load model
def load_model():
    model = models.mobilenet_v2(pretrained=False)
    model.classifier[1] = nn.Linear(model.classifier[1].in_features, len(class_names))
    model.load_state_dict(torch.load("isl_model.pth", map_location="cpu"))
    model.eval()
    return model

model = load_model()

# Prediction function
def predict(img: Image.Image):
    with torch.no_grad():
        x = transform(img).unsqueeze(0)
        out = model(x)
        return class_names[out.argmax(1).item()]

# Gradio interface
demo = gr.Interface(
    fn=predict,
    inputs=gr.Image(type="pil"),
    outputs="text",
    title="ISL Alphabet Recognition",
    description="Upload a hand sign image (A–Z) to get the predicted letter."
)

demo.launch()