Spaces:
Running
Running
File size: 3,293 Bytes
a5a80dd e6f5f64 a5a80dd bba885e 6042735 a5a80dd bba885e a5a80dd bba885e a5a80dd bba885e a5a80dd bba885e a5a80dd bba885e a5a80dd bba885e a5a80dd bba885e a5a80dd bba885e a5a80dd bba885e a5a80dd bba885e a5a80dd bba885e a5a80dd bba885e a5a80dd bba885e a5a80dd bba885e a5a80dd bba885e a5a80dd | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 | """
VGG-style CNN for EMNIST character classification.
See the README for a more detailed description.
The .pth file (weights) for this model will be downloaded from HuggingFace by app.py
It's hosted at https://huggingface.co/compendious/EMNIST-OCR-WEIGHTS/
The file is EMNIST_CNN.pth
Go here to download directly:
https://huggingface.co/compendious/EMNIST-OCR-WEIGHTS/resolve/main/EMNIST_CNN.pth?download=true
"""
import torch
import torch.nn as nn
import torch.nn.functional as F
class ConvBlock(nn.Module):
"""Convolutional block: 2 conv layers, LeakyReLU, MaxPool"""
def __init__(self, in_channels, out_channels, padding=1, pool_kernel=2, pool_stride=2):
super(ConvBlock, self).__init__()
self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=padding)
self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=padding)
self.pool = nn.MaxPool2d(kernel_size=pool_kernel, stride=pool_stride)
def forward(self, x):
# CHANGE 1: LeakyReLU prevents "dead neurons," critical for 62-class differentiation.
x = F.leaky_relu(self.conv1(x), negative_slope=0.1)
x = F.leaky_relu(self.conv2(x), negative_slope=0.1)
x = self.pool(x)
return x
class EMNIST_VGG(nn.Module):
"""
The actual CNN that will be trained.
Brought to you by composition.
"""
def __init__(self, num_classes=62):
super(EMNIST_VGG, self).__init__()
# The four blocks
self.conv1 = ConvBlock(in_channels=1, out_channels=32, pool_kernel=2, pool_stride=2)
self.bn1 = nn.BatchNorm2d(32)
self.conv2 = ConvBlock(in_channels=32, out_channels=64, pool_stride=2)
self.bn2 = nn.BatchNorm2d(64)
self.conv3 = ConvBlock(in_channels=64, out_channels=128, pool_stride=1)
self.bn3 = nn.BatchNorm2d(128)
self.conv4 = ConvBlock(in_channels=128, out_channels=256, pool_stride=1)
self.bn4 = nn.BatchNorm2d(256)
# CHANGE 2: Spatial Dropout.
# Drops entire feature maps to force redundancy, unlike standard dropout.
self.spatial_drop = nn.Dropout2d(p=0.1)
# Flatten layer (no parameters needed, only reshaping)
self.flatten = nn.Flatten()
# Two fully-connected layers
# CHANGE 3: Expanded Width (256 -> 512).
# Your Keras model used 512; 256 is a bottleneck for 62 classes.
self.fc1 = nn.Linear(256 * 5 * 5, 512)
self.bn_fc = nn.BatchNorm1d(512) # Added BN to the dense layer for stability
self.dropout = nn.Dropout(p=0.5)
# Classifier
self.fc2 = nn.Linear(512, num_classes)
def forward(self, x):
x = self.conv1(x)
x = self.bn1(x)
x = self.conv2(x)
x = self.bn2(x)
x = self.spatial_drop(x) # Apply mild spatial regularization
x = self.conv3(x)
x = self.bn3(x)
x = self.spatial_drop(x)
x = self.conv4(x)
x = self.bn4(x)
x = self.flatten(x)
# Dense Pass
x = self.fc1(x)
x = self.bn_fc(x)
x = F.leaky_relu(x, negative_slope=0.1)
x = self.dropout(x)
x = self.fc2(x)
return x
|