File size: 1,777 Bytes
d62e6d2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 |
{
"inputs": [
"images"
],
"modules": {
"lmim_encoder": {
"config": {
"args": {
"depth": 12,
"embed_dim": 384,
"img_size": [
32,
128
],
"in_chans": 3,
"mlp_ratio": 4.0,
"norm_layer_eps": 1e-06,
"num_heads": 6,
"patch_size": 4
}
},
"type": "DeepTextRecognition.models.lmim.MAEEncoderModel"
},
"text_decoder": {
"config": {
"args": {
"bidirectional": true,
"dropout": 0.1,
"hidden_sizes": [
256,
256
],
"input_size": 384,
"num_layers": 2,
"output_size": 95
}
},
"type": "DeepTextRecognition.BiLSTMModel"
},
"tokenizer": {
"config": {
"args": {
"characters": "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",
"max_length": 25
}
},
"type": "DeepTextRecognition.CTCTokenizer"
}
},
"order": [
"lmim_encoder",
"text_decoder",
"tokenizer"
],
"outputs": [
"tokenizer:labels",
"lmim_encoder:visual_features"
],
"routing": {
"lmim_encoder": {
"inputs": [
"images"
],
"outputs": [
"lmim_encoder:visual_features",
"lmim_encoder:mask",
"lmim_encoder:ids_restore"
]
},
"text_decoder": {
"inputs": [
"lmim_encoder:visual_features"
],
"outputs": [
"text_decoder:text_predictions"
]
},
"tokenizer": {
"inputs": [
"text_decoder:text_predictions"
],
"outputs": [
"tokenizer:labels"
]
}
}
}
|