{ "data": { "dev": [ "dev_en_mul", "dev_es_mul", "dev_fr_mul", "dev_it_mul", "dev_ky_mul", "dev_ru_mul", "dev_nl_mul", "dev_tt_mul", "dev_tr_mul", "dev_sv-SE_mul" ], "test": [ "test_en_mul", "test_es_mul", "test_fr_mul", "test_it_mul", "test_ky_mul", "test_ru_mul", "test_nl_mul", "test_tt_mul", "test_tr_mul", "test_sv-SE_mul" ], "filter": "10:1200" }, "tokenizer": { "type": "LexiconTokenizer", "option-init": { "lexicon": "dict/ten/lexicon_mul10.txt" }, "file": "dict/ten/tokenizer_mul10.tknz", "|V|": 75 }, "env": { "CUDA_VISIBLE_DEVICES": "0,1,2,3,4,5,6,7,8,9" }, "train": { "bin": "cat.ctc.train", "option": { "amp": true, "batch_size": 320, "grad_norm": 5.0, "grad_accum_fold": 2, "check_freq": 500, "large-dataset": true, "ld": "data/*/{11_1000,1001_1200}/*.tar" } }, "inference": { "avgmodel": { "mode": "best", "num": 3 }, "infer": { "bin": "cat.ctc.decode", "option": { "beam_size": 16, "nj": 16, "store_ark": true } }, "er": { "per": true } }, "commit": "1b77bc21f7a5faac5e47f6bc4445f23ebf0e0633" }