File size: 1,777 Bytes
d62e6d2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
{
  "inputs": [
    "images"
  ],
  "modules": {
    "lmim_encoder": {
      "config": {
        "args": {
          "depth": 12,
          "embed_dim": 384,
          "img_size": [
            32,
            128
          ],
          "in_chans": 3,
          "mlp_ratio": 4.0,
          "norm_layer_eps": 1e-06,
          "num_heads": 6,
          "patch_size": 4
        }
      },
      "type": "DeepTextRecognition.models.lmim.MAEEncoderModel"
    },
    "text_decoder": {
      "config": {
        "args": {
          "bidirectional": true,
          "dropout": 0.1,
          "hidden_sizes": [
            256,
            256
          ],
          "input_size": 384,
          "num_layers": 2,
          "output_size": 95
        }
      },
      "type": "DeepTextRecognition.BiLSTMModel"
    },
    "tokenizer": {
      "config": {
        "args": {
          "characters": "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",
          "max_length": 25
        }
      },
      "type": "DeepTextRecognition.CTCTokenizer"
    }
  },
  "order": [
    "lmim_encoder",
    "text_decoder",
    "tokenizer"
  ],
  "outputs": [
    "tokenizer:labels",
    "lmim_encoder:visual_features"
  ],
  "routing": {
    "lmim_encoder": {
      "inputs": [
        "images"
      ],
      "outputs": [
        "lmim_encoder:visual_features",
        "lmim_encoder:mask",
        "lmim_encoder:ids_restore"
      ]
    },
    "text_decoder": {
      "inputs": [
        "lmim_encoder:visual_features"
      ],
      "outputs": [
        "text_decoder:text_predictions"
      ]
    },
    "tokenizer": {
      "inputs": [
        "text_decoder:text_predictions"
      ],
      "outputs": [
        "tokenizer:labels"
      ]
    }
  }
}