TatarTokenizers / best_models.json
ArabovMK's picture
Upload best_models.json with huggingface_hub
d9aca72 verified
{
"bpe": {
"best_run": "v8000_mf2",
"out_dir": "results\\bpe\\v8000_mf2",
"metrics": {
"oov_rate": 0.0,
"avg_sequence_length": 96.0113,
"avg_processing_time_ms": 0.19588143825531007,
"compression_ratio": 96.0113,
"total_tokens_evaluated": 1920226,
"unk_count": 0,
"train_time_s": 105.87230825424194,
"config": {
"vocab_size": 8000,
"min_frequency": 2,
"continuing_subword_prefix": "##"
}
},
"score": 63.712785797135034
},
"wordpiece": {
"best_run": "v8000_mf1",
"out_dir": "results\\wordpiece\\v8000_mf1",
"metrics": {
"oov_rate": 0.0,
"avg_sequence_length": 95.39795,
"avg_processing_time_ms": 31.364226222038273,
"compression_ratio": 95.39795,
"total_tokens_evaluated": 1907959,
"unk_count": 0,
"train_time_s": 124.3489019870758,
"config": {
"vocab_size": 8000,
"min_frequency": 1
}
},
"score": 63.20955201220989
},
"unigram": {
"best_run": "v16000",
"out_dir": "results\\unigram\\v16000",
"metrics": {
"oov_rate": 0.0,
"avg_sequence_length": 90.8909,
"avg_processing_time_ms": 0.29166127443313594,
"compression_ratio": 90.8909,
"total_tokens_evaluated": 1817818,
"unk_count": 0,
"train_time_s": 614.1360929012299,
"config": {
"vocab_size": 16000
}
},
"score": 60.91625533579062
},
"spm": {
"best_run": "v32000",
"out_dir": "results\\spm_unigram\\v32000",
"metrics": {
"oov_rate": 0.0,
"avg_sequence_length": 86.6945,
"avg_processing_time_ms": 0.1026016116142273,
"compression_ratio": 86.6945,
"total_tokens_evaluated": 1733890,
"unk_count": 0,
"unk_piece_used": "[UNK]",
"train_time_s": 249.83488726615906,
"config": {
"vocab_size": 32000
}
},
"score": 61.78699439108904
}
}