kiri-ocr / config.json
mrrtmob's picture
Update config.json
243bac5 verified
{
"model_type": "kiri-ocr",
"architectures": ["KiriOCR"],
"library_name": "pytorch",
"torch_dtype": "float32",
"hidden_size": 256,
"num_hidden_layers": 7,
"num_attention_heads": 8,
"image_height": 48,
"image_width": 640,
"max_decoder_length": 260,
"vocab_size": 966,
"unk_token": "<unk>",
"collapse_whitespace": true,
"unicode_nfc": true,
"encoder": {
"dim": 256,
"num_layers": 4,
"num_heads": 8,
"feedforward_dim": 1024,
"dropout": 0.15
},
"decoder": {
"enabled": true,
"dim": 256,
"num_layers": 3,
"num_heads": 8,
"feedforward_dim": 1024
},
"ctc": {
"enabled": true,
"fusion_alpha": 0.5
},
"language_model": {
"enabled": true,
"fusion_enabled_eval": true,
"fusion_alpha": 0.35
},
"inference": {
"use_fp16": false,
"use_autocast": false,
"beam_width": 4,
"beam_length_penalty": 0.6,
"eos_logp_bias": 5.0,
"eos_logp_boost": 5.0,
"eos_bias_until_length": 3,
"repeat_last_penalty": 3.0,
"unk_logp_penalty": 2.0,
"max_length_ratio": 1.5,
"max_length_pad": 10,
"memory_max_length_ratio": 0.75
},
"preprocessing": {
"mean": 0.5,
"std": 0.5,
"pad_value": 128
},
"transformers_version": "4.40.0"
}