MedusaBitNet-2B-4T / benchmark_headtohead.json
parrishcorcoran's picture
Upload benchmark_headtohead.json with huggingface_hub
0082594 verified
{
"results": [
{
"name": "BitNet b1.58 2B-4T (I2_S)",
"model_path": "/home/cpinchington/MedusaBitNet/models/bitnet-b1.58-2B-4T/ggml-model-i2_s.gguf",
"model_size_mb": 1187.310112,
"n_runs": 8,
"avg_gen_tok_s": 72.73875,
"avg_prefill_tok_s": 434.95375,
"avg_ms_per_tok": 13.75375,
"runs": [
{
"gen_tok_s": 75.47,
"gen_ms_per_tok": 13.25,
"prefill_tok_s": 473.97,
"wall_time": 3.8744120597839355
},
{
"gen_tok_s": 74.61,
"gen_ms_per_tok": 13.4,
"prefill_tok_s": 400.33,
"wall_time": 3.946519613265991
},
{
"gen_tok_s": 73.39,
"gen_ms_per_tok": 13.63,
"prefill_tok_s": 468.56,
"wall_time": 3.981218099594116
},
{
"gen_tok_s": 71.47,
"gen_ms_per_tok": 13.99,
"prefill_tok_s": 403.88,
"wall_time": 4.082836866378784
},
{
"gen_tok_s": 71.75,
"gen_ms_per_tok": 13.94,
"prefill_tok_s": 425.03,
"wall_time": 4.0725319385528564
},
{
"gen_tok_s": 71.9,
"gen_ms_per_tok": 13.91,
"prefill_tok_s": 399.68,
"wall_time": 4.0745580196380615
},
{
"gen_tok_s": 71.23,
"gen_ms_per_tok": 14.04,
"prefill_tok_s": 466.73,
"wall_time": 4.097683429718018
},
{
"gen_tok_s": 72.09,
"gen_ms_per_tok": 13.87,
"prefill_tok_s": 441.45,
"wall_time": 4.053175687789917
}
]
},
{
"name": "Qwen2.5 1.5B (Q4_K_M)",
"model_path": "/home/cpinchington/MedusaBitNet/models/competing/qwen2.5-1.5b-instruct-q4_k_m.gguf",
"model_size_mb": 1117.320736,
"n_runs": 8,
"avg_gen_tok_s": 88.79125,
"avg_prefill_tok_s": 317.92375,
"avg_ms_per_tok": 11.2625,
"runs": [
{
"gen_tok_s": 88.83,
"gen_ms_per_tok": 11.26,
"prefill_tok_s": 277.6,
"wall_time": 3.4317729473114014
},
{
"gen_tok_s": 88.81,
"gen_ms_per_tok": 11.26,
"prefill_tok_s": 349.13,
"wall_time": 3.419360637664795
},
{
"gen_tok_s": 87.75,
"gen_ms_per_tok": 11.4,
"prefill_tok_s": 362.62,
"wall_time": 3.448280096054077
},
{
"gen_tok_s": 88.93,
"gen_ms_per_tok": 11.24,
"prefill_tok_s": 371.15,
"wall_time": 2.9640591144561768
},
{
"gen_tok_s": 88.83,
"gen_ms_per_tok": 11.26,
"prefill_tok_s": 278.95,
"wall_time": 3.4554200172424316
},
{
"gen_tok_s": 89.49,
"gen_ms_per_tok": 11.17,
"prefill_tok_s": 271.77,
"wall_time": 2.8726541996002197
},
{
"gen_tok_s": 88.94,
"gen_ms_per_tok": 11.24,
"prefill_tok_s": 278.1,
"wall_time": 3.42142915725708
},
{
"gen_tok_s": 88.75,
"gen_ms_per_tok": 11.27,
"prefill_tok_s": 354.07,
"wall_time": 3.4249227046966553
}
]
},
{
"name": "Llama 3.2 1B (Q4_K_M)",
"model_path": "/home/cpinchington/MedusaBitNet/models/competing/Llama-3.2-1B-Instruct-Q4_K_M.gguf",
"model_size_mb": 807.694464,
"n_runs": 8,
"avg_gen_tok_s": 115.94624999999999,
"avg_prefill_tok_s": 440.73375,
"avg_ms_per_tok": 8.62625,
"runs": [
{
"gen_tok_s": 115.23,
"gen_ms_per_tok": 8.68,
"prefill_tok_s": 373.11,
"wall_time": 3.3805642127990723
},
{
"gen_tok_s": 115.66,
"gen_ms_per_tok": 8.65,
"prefill_tok_s": 499.09,
"wall_time": 3.3459088802337646
},
{
"gen_tok_s": 115.25,
"gen_ms_per_tok": 8.68,
"prefill_tok_s": 505.72,
"wall_time": 3.353907346725464
},
{
"gen_tok_s": 116.84,
"gen_ms_per_tok": 8.56,
"prefill_tok_s": 324.38,
"wall_time": 3.3358867168426514
},
{
"gen_tok_s": 115.81,
"gen_ms_per_tok": 8.63,
"prefill_tok_s": 531.45,
"wall_time": 3.3553287982940674
},
{
"gen_tok_s": 116.16,
"gen_ms_per_tok": 8.61,
"prefill_tok_s": 524.34,
"wall_time": 3.340409278869629
},
{
"gen_tok_s": 117.13,
"gen_ms_per_tok": 8.54,
"prefill_tok_s": 341.84,
"wall_time": 3.348862648010254
},
{
"gen_tok_s": 115.49,
"gen_ms_per_tok": 8.66,
"prefill_tok_s": 425.94,
"wall_time": 3.349743366241455
}
]
},
{
"name": "Gemma 2 2B (Q4_K_M)",
"model_path": "/home/cpinchington/MedusaBitNet/models/competing/gemma-2-2b-it-Q4_K_M.gguf",
"model_size_mb": 1708.582752,
"n_runs": 8,
"avg_gen_tok_s": 50.53125,
"avg_prefill_tok_s": 200.96,
"avg_ms_per_tok": 19.7875,
"runs": [
{
"gen_tok_s": 50.47,
"gen_ms_per_tok": 19.81,
"prefill_tok_s": 184.03,
"wall_time": 5.728861331939697
},
{
"gen_tok_s": 50.67,
"gen_ms_per_tok": 19.73,
"prefill_tok_s": 229.99,
"wall_time": 5.6800384521484375
},
{
"gen_tok_s": 50.37,
"gen_ms_per_tok": 19.85,
"prefill_tok_s": 165.91,
"wall_time": 5.7390992641448975
},
{
"gen_tok_s": 50.7,
"gen_ms_per_tok": 19.72,
"prefill_tok_s": 174.06,
"wall_time": 5.694071292877197
},
{
"gen_tok_s": 50.63,
"gen_ms_per_tok": 19.75,
"prefill_tok_s": 190.64,
"wall_time": 5.7339208126068115
},
{
"gen_tok_s": 50.33,
"gen_ms_per_tok": 19.87,
"prefill_tok_s": 253.86,
"wall_time": 5.723286867141724
},
{
"gen_tok_s": 50.52,
"gen_ms_per_tok": 19.79,
"prefill_tok_s": 233.19,
"wall_time": 5.69863224029541
},
{
"gen_tok_s": 50.56,
"gen_ms_per_tok": 19.78,
"prefill_tok_s": 176.0,
"wall_time": 5.730913162231445
}
]
}
],
"hardware": "AMD Ryzen AI MAX+ 395 (Strix Halo)",
"threads": 16
}