File size: 1,443 Bytes
cba2240
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
import torch
import torch.nn as nn
from transformers import PreTrainedModel
from transformers.modeling_outputs import SequenceClassifierOutput
from .configuration_captcha import CaptchaConfig

class CaptchaCRNN(PreTrainedModel):
    config_class = CaptchaConfig

    def __init__(self, config):
        super().__init__(config)
        self.conv_layer = nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.SiLU(),
            nn.MaxPool2d(2, 2),
            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.SiLU(),
            nn.MaxPool2d(2, 2),
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.SiLU(),
            nn.MaxPool2d(kernel_size=(2, 1)),
            nn.Conv2d(128, 256, kernel_size=3, padding=1),
            nn.BatchNorm2d(256),
            nn.SiLU()
        )
        self.lstm = nn.LSTM(input_size=1280, hidden_size=256, bidirectional=True, batch_first=True)
        self.classifier = nn.Linear(512, config.num_chars)
        self.post_init()

    def forward(self, x, labels=None):
        x = self.conv_layer(x)
        x = x.permute(0, 3, 1, 2)
        batch, width, channels, height = x.size()
        x = x.view(batch, width, -1)
        x, _ = self.lstm(x)
        logits = self.classifier(x)
        
        return SequenceClassifierOutput(logits=logits)