sentinel-universal-tokenizer / benchmark_results.json
5dimension's picture
🦴 v2.0: 65K text vocab, 30 languages, 300K+ samples
3824578 verified
{
"summary": {
"Sentinel-v2": {
"compress": 4.3427,
"fertility": 10.5022,
"vocab": 94208,
"efficiency": 0.046097
},
"GPT-2": {
"compress": 2.4381,
"fertility": 28.8158,
"vocab": 50257,
"efficiency": 0.048513
},
"Gemma": {
"compress": 5.3287,
"fertility": 8.348,
"vocab": 256000,
"efficiency": 0.020815
},
"Qwen2": {
"compress": 4.3289,
"fertility": 10.4499,
"vocab": 151936,
"efficiency": 0.028491
}
}
}