| import torch | |
| import torch.nn as nn | |
| from transformers import PreTrainedModel | |
| from transformers.modeling_outputs import SequenceClassifierOutput | |
| from .configuration_captcha import CaptchaConfig | |
| class CaptchaCRNN(PreTrainedModel): | |
| config_class = CaptchaConfig | |
| def __init__(self, config): | |
| super().__init__(config) | |
| self.conv_layer = nn.Sequential( | |
| nn.Conv2d(1, 32, kernel_size=3, padding=1), | |
| nn.BatchNorm2d(32), | |
| nn.SiLU(), | |
| nn.MaxPool2d(2, 2), | |
| nn.Conv2d(32, 64, kernel_size=3, padding=1), | |
| nn.BatchNorm2d(64), | |
| nn.SiLU(), | |
| nn.MaxPool2d(2, 2), | |
| nn.Conv2d(64, 128, kernel_size=3, padding=1), | |
| nn.BatchNorm2d(128), | |
| nn.SiLU(), | |
| nn.MaxPool2d(kernel_size=(2, 1)), | |
| nn.Conv2d(128, 256, kernel_size=3, padding=1), | |
| nn.BatchNorm2d(256), | |
| nn.SiLU() | |
| ) | |
| self.lstm = nn.LSTM(input_size=1280, hidden_size=256, bidirectional=True, batch_first=True) | |
| self.classifier = nn.Linear(512, config.num_chars) | |
| self.post_init() | |
| def forward(self, x, labels=None): | |
| x = self.conv_layer(x) | |
| x = x.permute(0, 3, 1, 2) | |
| batch, width, channels, height = x.size() | |
| x = x.view(batch, width, -1) | |
| x, _ = self.lstm(x) | |
| logits = self.classifier(x) | |
| return SequenceClassifierOutput(logits=logits) |