|
|
import torch
|
|
|
import torch.nn as nn
|
|
|
from torchvision import transforms, models
|
|
|
from PIL import Image
|
|
|
import gradio as gr
|
|
|
|
|
|
|
|
|
|
|
|
classes = [
|
|
|
"aa", "chi", "ee", "fu", "ha", "he", "hi", "ho", "ii",
|
|
|
"ka", "ke", "ki", "ko", "ku", "ma", "me", "mi", "mo", "mu",
|
|
|
"na", "ne", "ni", "nn", "no", "nu", "oo",
|
|
|
"ra", "re", "ri", "ro", "ru", "sa", "se", "shi", "so", "su",
|
|
|
"ta", "te", "to", "tsu", "uu", "wa", "wo", "ya", "yo", "yu"
|
|
|
]
|
|
|
|
|
|
|
|
|
transform = transforms.Compose([
|
|
|
transforms.Lambda(lambda x: x.convert('RGB')),
|
|
|
transforms.Resize((224, 224)),
|
|
|
transforms.ToTensor(),
|
|
|
transforms.RandomRotation(10),
|
|
|
transforms.ColorJitter(),
|
|
|
transforms.Normalize(mean=[0.5, 0.5, 0.5],
|
|
|
std=[0.5, 0.5, 0.5])
|
|
|
])
|
|
|
|
|
|
|
|
|
device = "cuda" if torch.cuda.is_available() else "cpu"
|
|
|
|
|
|
model = models.resnet50(weights=None)
|
|
|
in_features = model.fc.in_features
|
|
|
model.fc = nn.Sequential(
|
|
|
nn.Linear(in_features, 512),
|
|
|
nn.ReLU(),
|
|
|
nn.Dropout(0.4),
|
|
|
nn.Linear(512, len(classes))
|
|
|
)
|
|
|
|
|
|
model.load_state_dict(torch.load("best_model.pth", map_location=device))
|
|
|
model.to(device)
|
|
|
model.eval()
|
|
|
|
|
|
|
|
|
def predict(image):
|
|
|
img = transform(image).unsqueeze(0).to(device)
|
|
|
with torch.no_grad():
|
|
|
outputs = model(img)
|
|
|
_, predicted = torch.max(outputs, 1)
|
|
|
return f"Predicted: {classes[predicted.item()]}"
|
|
|
|
|
|
|
|
|
interface = gr.Interface(
|
|
|
fn=predict,
|
|
|
inputs=gr.Image(type="pil"),
|
|
|
outputs="text",
|
|
|
title="Japanese Hiragana Classifier",
|
|
|
description="Upload an image of a handwritten Hiragana character and get its predicted syllable."
|
|
|
)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
interface.launch()
|
|
|
|