| | import torch |
| | import torch.nn as nn |
| | import torch.nn.functional as F |
| | import torchvision.transforms as transforms |
| | from PIL import Image |
| | import numpy as np |
| |
|
| | class CNNModel(nn.Module): |
| | def __init__(self, num_classes=10, dropout_rate=0.5): |
| | super(CNNModel, self).__init__() |
| |
|
| | |
| | self.conv1 = nn.Conv2d(in_channels=1, out_channels=32, kernel_size=3, padding=1) |
| | self.bn1 = nn.BatchNorm2d(32) |
| | self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2) |
| |
|
| | |
| | self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1) |
| | self.bn2 = nn.BatchNorm2d(64) |
| | self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2) |
| |
|
| | |
| | self.conv3 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1) |
| | self.bn3 = nn.BatchNorm2d(128) |
| | self.pool3 = nn.MaxPool2d(kernel_size=2, stride=2) |
| |
|
| | self.flattened_size = 128 * 3 * 3 |
| |
|
| | |
| | self.fc1 = nn.Linear(self.flattened_size, 512) |
| | self.dropout1 = nn.Dropout(dropout_rate) |
| | self.fc2 = nn.Linear(512, 256) |
| | self.dropout2 = nn.Dropout(dropout_rate) |
| | self.fc3 = nn.Linear(256, num_classes) |
| |
|
| | def forward(self, x): |
| | |
| | x = self.conv1(x) |
| | x = self.bn1(x) |
| | x = F.relu(x) |
| | x = self.pool1(x) |
| |
|
| | |
| | x = self.conv2(x) |
| | x = self.bn2(x) |
| | x = F.relu(x) |
| | x = self.pool2(x) |
| |
|
| | |
| | x = self.conv3(x) |
| | x = self.bn3(x) |
| | x = F.relu(x) |
| | x = self.pool3(x) |
| |
|
| | |
| | x = x.view(x.size(0), -1) |
| |
|
| | |
| | x = F.relu(self.fc1(x)) |
| | x = self.dropout1(x) |
| | x = F.relu(self.fc2(x)) |
| | x = self.dropout2(x) |
| | x = self.fc3(x) |
| |
|
| | return x |
| |
|
| | def load_model(model_path): |
| | """Load the trained model""" |
| | model = CNNModel() |
| | model.load_state_dict(torch.load(model_path, map_location='cpu')) |
| | model.eval() |
| | return model |
| |
|
| | def predict_image(model, image_path): |
| | """Predict digit from image""" |
| | |
| | transform = transforms.Compose([ |
| | transforms.Grayscale(), |
| | transforms.Resize((28, 28)), |
| | transforms.ToTensor(), |
| | transforms.Normalize((0.1307,), (0.3081,)) |
| | ]) |
| |
|
| | image = Image.open(image_path) |
| | image_tensor = transform(image).unsqueeze(0) |
| |
|
| | |
| | with torch.no_grad(): |
| | outputs = model(image_tensor) |
| | _, predicted = torch.max(outputs, 1) |
| | shifted_prediction = predicted.item() |
| |
|
| | |
| | original_digit = 9 - shifted_prediction |
| |
|
| | return original_digit, shifted_prediction |
| |
|
| | |
| | |
| | |
| | |
| |
|