farbodpya commited on
Commit
d7edaf2
·
verified ·
1 Parent(s): f420ab0

Rename modeling_persianocr.py to model.py

Browse files
Files changed (1) hide show
  1. modeling_persianocr.py → model.py +18 -35
modeling_persianocr.py → model.py RENAMED
@@ -1,9 +1,10 @@
1
- import torch
2
  import torch.nn as nn
3
- from transformers import PreTrainedModel, PretrainedConfig
4
 
5
- def GN(c, groups=16):
6
- return nn.GroupNorm(min(groups, c), c)
 
 
7
 
8
  class LightResNetCNN(nn.Module):
9
  def __init__(self, in_channels=1, adaptive_height=8):
@@ -15,8 +16,7 @@ class LightResNetCNN(nn.Module):
15
  self.layer4 = nn.Sequential(nn.Conv2d(128, 256, 3, 1, 1), GN(256), nn.ReLU())
16
  self.layer5 = nn.Sequential(nn.Conv2d(256, 256, 3, 1, 1), GN(256), nn.ReLU())
17
  self.layer6 = nn.Sequential(nn.Conv2d(256, 128, 3, 1, 1), GN(128), nn.ReLU())
18
- self.adaptive_pool = nn.AdaptiveAvgPool2d((adaptive_height, None))
19
-
20
  def forward(self, x):
21
  for i in range(1, 7):
22
  x = getattr(self, f"layer{i}")(x)
@@ -32,40 +32,23 @@ class PositionalEncoding(nn.Module):
32
  pe[:, 0::2] = torch.sin(position * div_term)
33
  pe[:, 1::2] = torch.cos(position * div_term)
34
  self.register_buffer("pe", pe.unsqueeze(0))
35
-
36
  def forward(self, x):
37
  return x + self.pe[:, :x.size(1), :]
38
 
39
- class PersianOCRConfig(PretrainedConfig):
40
- model_type = "persianocr"
41
-
42
- def __init__(self, num_classes=100, d_model=1280, nhead=16, num_layers=8, dropout=0.2, adaptive_height=8, **kwargs):
43
- super().__init__(**kwargs)
44
- self.num_classes = num_classes
45
- self.d_model = d_model
46
- self.nhead = nhead
47
- self.num_layers = num_layers
48
- self.dropout = dropout
49
- self.adaptive_height = adaptive_height
50
-
51
- class PersianOCRModel(PreTrainedModel):
52
- config_class = PersianOCRConfig
53
-
54
- def __init__(self, config):
55
- super().__init__(config)
56
- self.cnn = LightResNetCNN(in_channels=1, adaptive_height=config.adaptive_height)
57
- self.proj = nn.Linear(128 * config.adaptive_height, config.d_model)
58
- self.posenc = PositionalEncoding(config.d_model)
59
- encoder_layer = nn.TransformerEncoderLayer(config.d_model, config.nhead, batch_first=True, dropout=config.dropout)
60
- self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=config.num_layers)
61
- self.fc = nn.Linear(config.d_model, config.num_classes)
62
- self.post_init()
63
-
64
- def forward(self, x, labels=None):
65
  f = self.cnn(x)
66
  B, C, H, W = f.size()
67
  f = f.permute(0, 3, 1, 2).reshape(B, W, C * H)
68
  f = self.posenc(self.proj(f))
69
  out = self.transformer(f)
70
- logits = self.fc(out)
71
- return {"logits": logits}
 
 
1
  import torch.nn as nn
2
+ import torch
3
 
4
+ # -----------------------------
5
+ # 3️⃣ Model definition
6
+ # -----------------------------
7
+ def GN(c, groups=16): return nn.GroupNorm(min(groups, c), c)
8
 
9
  class LightResNetCNN(nn.Module):
10
  def __init__(self, in_channels=1, adaptive_height=8):
 
16
  self.layer4 = nn.Sequential(nn.Conv2d(128, 256, 3, 1, 1), GN(256), nn.ReLU())
17
  self.layer5 = nn.Sequential(nn.Conv2d(256, 256, 3, 1, 1), GN(256), nn.ReLU())
18
  self.layer6 = nn.Sequential(nn.Conv2d(256, 128, 3, 1, 1), GN(128), nn.ReLU())
19
+ self.adaptive_pool = nn.AdaptiveAvgPool2d((self.adaptive_height, None))
 
20
  def forward(self, x):
21
  for i in range(1, 7):
22
  x = getattr(self, f"layer{i}")(x)
 
32
  pe[:, 0::2] = torch.sin(position * div_term)
33
  pe[:, 1::2] = torch.cos(position * div_term)
34
  self.register_buffer("pe", pe.unsqueeze(0))
 
35
  def forward(self, x):
36
  return x + self.pe[:, :x.size(1), :]
37
 
38
+ class CNN_Transformer_OCR(nn.Module):
39
+ def __init__(self, num_classes, d_model=1280, nhead=16, num_layers=8, dropout=0.2):
40
+ super().__init__()
41
+ self.cnn = LightResNetCNN(in_channels=1, adaptive_height=8)
42
+ self.proj = nn.Linear(128 * 8, d_model)
43
+ self.posenc = PositionalEncoding(d_model)
44
+ encoder_layer = nn.TransformerEncoderLayer(d_model, nhead, batch_first=True, dropout=dropout)
45
+ self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
46
+ self.fc = nn.Linear(d_model, num_classes)
47
+ def forward(self, x):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
  f = self.cnn(x)
49
  B, C, H, W = f.size()
50
  f = f.permute(0, 3, 1, 2).reshape(B, W, C * H)
51
  f = self.posenc(self.proj(f))
52
  out = self.transformer(f)
53
+ out = self.fc(out)
54
+ return out.log_softmax(2)