shifted-mnist-cnn / inference.py
felix2703's picture
Add Inference script
19f67df verified
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as transforms
from PIL import Image
import numpy as np
class CNNModel(nn.Module):
def __init__(self, num_classes=10, dropout_rate=0.5):
super(CNNModel, self).__init__()
# First convolutional block
self.conv1 = nn.Conv2d(in_channels=1, out_channels=32, kernel_size=3, padding=1)
self.bn1 = nn.BatchNorm2d(32)
self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
# Second convolutional block
self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1)
self.bn2 = nn.BatchNorm2d(64)
self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)
# Third convolutional block
self.conv3 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1)
self.bn3 = nn.BatchNorm2d(128)
self.pool3 = nn.MaxPool2d(kernel_size=2, stride=2)
self.flattened_size = 128 * 3 * 3
# Fully connected layers with dropout
self.fc1 = nn.Linear(self.flattened_size, 512)
self.dropout1 = nn.Dropout(dropout_rate)
self.fc2 = nn.Linear(512, 256)
self.dropout2 = nn.Dropout(dropout_rate)
self.fc3 = nn.Linear(256, num_classes)
def forward(self, x):
# First conv block
x = self.conv1(x)
x = self.bn1(x)
x = F.relu(x)
x = self.pool1(x)
# Second conv block
x = self.conv2(x)
x = self.bn2(x)
x = F.relu(x)
x = self.pool2(x)
# Third conv block
x = self.conv3(x)
x = self.bn3(x)
x = F.relu(x)
x = self.pool3(x)
# Flatten for FC layers
x = x.view(x.size(0), -1)
# Fully connected layers with dropout
x = F.relu(self.fc1(x))
x = self.dropout1(x)
x = F.relu(self.fc2(x))
x = self.dropout2(x)
x = self.fc3(x)
return x
def load_model(model_path):
"""Load the trained model"""
model = CNNModel()
model.load_state_dict(torch.load(model_path, map_location='cpu'))
model.eval()
return model
def predict_image(model, image_path):
"""Predict digit from image"""
# Load and preprocess image
transform = transforms.Compose([
transforms.Grayscale(),
transforms.Resize((28, 28)),
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,))
])
image = Image.open(image_path)
image_tensor = transform(image).unsqueeze(0)
# Make prediction
with torch.no_grad():
outputs = model(image_tensor)
_, predicted = torch.max(outputs, 1)
shifted_prediction = predicted.item()
# Convert back to original digit
original_digit = 9 - shifted_prediction
return original_digit, shifted_prediction
# Example usage:
# model = load_model('pytorch_model.bin')
# original, shifted = predict_image(model, 'your_image.png')
# print(f"Original digit: {original}, Shifted prediction: {shifted}")