File size: 1,671 Bytes
f945760
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
{
    "data": {
        "dev": [
            "dev_en_mul",
            "dev_es_mul",
            "dev_fr_mul",
            "dev_it_mul",
            "dev_ky_mul",
            "dev_ru_mul",
            "dev_nl_mul",
            "dev_tt_mul",
            "dev_tr_mul",
            "dev_sv-SE_mul"
        ],
        "test": [
            "test_en_mul",
            "test_es_mul",
            "test_fr_mul",
            "test_it_mul",
            "test_ky_mul",
            "test_ru_mul",
            "test_nl_mul",
            "test_tt_mul",
            "test_tr_mul",
            "test_sv-SE_mul"
        ],
        "filter": "10:1200"
    },
    "tokenizer": {
        "type": "LexiconTokenizer",
        "option-init": {
            "lexicon": "dict/ten/lexicon_mul10.txt"
        },
        "file": "dict/ten/tokenizer_mul10.tknz",
        "|V|": 75
    },
    "env": {
        "CUDA_VISIBLE_DEVICES": "0,1,2,3,4,5,6,7,8,9"
    },
    "train": {
        "bin": "cat.ctc.train",
        "option": {
            "amp": true,
            "batch_size": 320,
            "grad_norm": 5.0,
            "grad_accum_fold": 2,
            "check_freq": 500,
            "large-dataset": true,
            "ld": "data/*/{11_1000,1001_1200}/*.tar"
        }
    },
    "inference": {
        "avgmodel": {
            "mode": "best",
            "num": 3
        },
        "infer": {
            "bin": "cat.ctc.decode",
            "option": {
                "beam_size": 16,
                "nj": 16,
                "store_ark": true
            }
        },
        "er": {
            "per": true
        }
    },
    "commit": "1b77bc21f7a5faac5e47f6bc4445f23ebf0e0633"
}