import torch import torch.nn as nn import torch.nn.functional as F import torchvision.transforms as transforms from PIL import Image import numpy as np class CNNModel(nn.Module): def __init__(self, num_classes=10, dropout_rate=0.5): super(CNNModel, self).__init__() # First convolutional block self.conv1 = nn.Conv2d(in_channels=1, out_channels=32, kernel_size=3, padding=1) self.bn1 = nn.BatchNorm2d(32) self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2) # Second convolutional block self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1) self.bn2 = nn.BatchNorm2d(64) self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2) # Third convolutional block self.conv3 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1) self.bn3 = nn.BatchNorm2d(128) self.pool3 = nn.MaxPool2d(kernel_size=2, stride=2) self.flattened_size = 128 * 3 * 3 # Fully connected layers with dropout self.fc1 = nn.Linear(self.flattened_size, 512) self.dropout1 = nn.Dropout(dropout_rate) self.fc2 = nn.Linear(512, 256) self.dropout2 = nn.Dropout(dropout_rate) self.fc3 = nn.Linear(256, num_classes) def forward(self, x): # First conv block x = self.conv1(x) x = self.bn1(x) x = F.relu(x) x = self.pool1(x) # Second conv block x = self.conv2(x) x = self.bn2(x) x = F.relu(x) x = self.pool2(x) # Third conv block x = self.conv3(x) x = self.bn3(x) x = F.relu(x) x = self.pool3(x) # Flatten for FC layers x = x.view(x.size(0), -1) # Fully connected layers with dropout x = F.relu(self.fc1(x)) x = self.dropout1(x) x = F.relu(self.fc2(x)) x = self.dropout2(x) x = self.fc3(x) return x def load_model(model_path): """Load the trained model""" model = CNNModel() model.load_state_dict(torch.load(model_path, map_location='cpu')) model.eval() return model def predict_image(model, image_path): """Predict digit from image""" # Load and preprocess image transform = transforms.Compose([ transforms.Grayscale(), transforms.Resize((28, 28)), transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,)) ]) image = Image.open(image_path) image_tensor = transform(image).unsqueeze(0) # Make prediction with torch.no_grad(): outputs = model(image_tensor) _, predicted = torch.max(outputs, 1) shifted_prediction = predicted.item() # Convert back to original digit original_digit = 9 - shifted_prediction return original_digit, shifted_prediction # Example usage: # model = load_model('pytorch_model.bin') # original, shifted = predict_image(model, 'your_image.png') # print(f"Original digit: {original}, Shifted prediction: {shifted}")