| { | |
| "bpe": { | |
| "best_run": "v8000_mf2", | |
| "out_dir": "results\\bpe\\v8000_mf2", | |
| "metrics": { | |
| "oov_rate": 0.0, | |
| "avg_sequence_length": 96.0113, | |
| "avg_processing_time_ms": 0.19588143825531007, | |
| "compression_ratio": 96.0113, | |
| "total_tokens_evaluated": 1920226, | |
| "unk_count": 0, | |
| "train_time_s": 105.87230825424194, | |
| "config": { | |
| "vocab_size": 8000, | |
| "min_frequency": 2, | |
| "continuing_subword_prefix": "##" | |
| } | |
| }, | |
| "score": 63.712785797135034 | |
| }, | |
| "wordpiece": { | |
| "best_run": "v8000_mf1", | |
| "out_dir": "results\\wordpiece\\v8000_mf1", | |
| "metrics": { | |
| "oov_rate": 0.0, | |
| "avg_sequence_length": 95.39795, | |
| "avg_processing_time_ms": 31.364226222038273, | |
| "compression_ratio": 95.39795, | |
| "total_tokens_evaluated": 1907959, | |
| "unk_count": 0, | |
| "train_time_s": 124.3489019870758, | |
| "config": { | |
| "vocab_size": 8000, | |
| "min_frequency": 1 | |
| } | |
| }, | |
| "score": 63.20955201220989 | |
| }, | |
| "unigram": { | |
| "best_run": "v16000", | |
| "out_dir": "results\\unigram\\v16000", | |
| "metrics": { | |
| "oov_rate": 0.0, | |
| "avg_sequence_length": 90.8909, | |
| "avg_processing_time_ms": 0.29166127443313594, | |
| "compression_ratio": 90.8909, | |
| "total_tokens_evaluated": 1817818, | |
| "unk_count": 0, | |
| "train_time_s": 614.1360929012299, | |
| "config": { | |
| "vocab_size": 16000 | |
| } | |
| }, | |
| "score": 60.91625533579062 | |
| }, | |
| "spm": { | |
| "best_run": "v32000", | |
| "out_dir": "results\\spm_unigram\\v32000", | |
| "metrics": { | |
| "oov_rate": 0.0, | |
| "avg_sequence_length": 86.6945, | |
| "avg_processing_time_ms": 0.1026016116142273, | |
| "compression_ratio": 86.6945, | |
| "total_tokens_evaluated": 1733890, | |
| "unk_count": 0, | |
| "unk_piece_used": "[UNK]", | |
| "train_time_s": 249.83488726615906, | |
| "config": { | |
| "vocab_size": 32000 | |
| } | |
| }, | |
| "score": 61.78699439108904 | |
| } | |
| } |