FinOS-Internship
/

shifted-mnist-cnn

Image Classification

computer-vision

Eval Results (legacy)

Model card Files Files and versions

shifted-mnist-cnn / inference.py

felix2703's picture

Add Inference script

19f67df verified 4 months ago

history blame contribute delete

3.05 kB

	import torch
	import torch.nn as nn
	import torch.nn.functional as F
	import torchvision.transforms as transforms
	from PIL import Image
	import numpy as np

	class CNNModel(nn.Module):
	def __init__(self, num_classes=10, dropout_rate=0.5):
	super(CNNModel, self).__init__()

	# First convolutional block
	self.conv1 = nn.Conv2d(in_channels=1, out_channels=32, kernel_size=3, padding=1)
	self.bn1 = nn.BatchNorm2d(32)
	self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)

	# Second convolutional block
	self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1)
	self.bn2 = nn.BatchNorm2d(64)
	self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)

	# Third convolutional block
	self.conv3 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1)
	self.bn3 = nn.BatchNorm2d(128)
	self.pool3 = nn.MaxPool2d(kernel_size=2, stride=2)

	self.flattened_size = 128 * 3 * 3

	# Fully connected layers with dropout
	self.fc1 = nn.Linear(self.flattened_size, 512)
	self.dropout1 = nn.Dropout(dropout_rate)
	self.fc2 = nn.Linear(512, 256)
	self.dropout2 = nn.Dropout(dropout_rate)
	self.fc3 = nn.Linear(256, num_classes)

	def forward(self, x):
	# First conv block
	x = self.conv1(x)
	x = self.bn1(x)
	x = F.relu(x)
	x = self.pool1(x)

	# Second conv block
	x = self.conv2(x)
	x = self.bn2(x)
	x = F.relu(x)
	x = self.pool2(x)

	# Third conv block
	x = self.conv3(x)
	x = self.bn3(x)
	x = F.relu(x)
	x = self.pool3(x)

	# Flatten for FC layers
	x = x.view(x.size(0), -1)

	# Fully connected layers with dropout
	x = F.relu(self.fc1(x))
	x = self.dropout1(x)
	x = F.relu(self.fc2(x))
	x = self.dropout2(x)
	x = self.fc3(x)

	return x

	def load_model(model_path):
	"""Load the trained model"""
	model = CNNModel()
	model.load_state_dict(torch.load(model_path, map_location='cpu'))
	model.eval()
	return model

	def predict_image(model, image_path):
	"""Predict digit from image"""
	# Load and preprocess image
	transform = transforms.Compose([
	transforms.Grayscale(),
	transforms.Resize((28, 28)),
	transforms.ToTensor(),
	transforms.Normalize((0.1307,), (0.3081,))
	])

	image = Image.open(image_path)
	image_tensor = transform(image).unsqueeze(0)

	# Make prediction
	with torch.no_grad():
	outputs = model(image_tensor)
	_, predicted = torch.max(outputs, 1)
	shifted_prediction = predicted.item()

	# Convert back to original digit
	original_digit = 9 - shifted_prediction

	return original_digit, shifted_prediction

	# Example usage:
	# model = load_model('pytorch_model.bin')
	# original, shifted = predict_image(model, 'your_image.png')
	# print(f"Original digit: {original}, Shifted prediction: {shifted}")