| { |
| "results": [ |
| { |
| "name": "BitNet b1.58 2B-4T (I2_S)", |
| "model_path": "/home/cpinchington/MedusaBitNet/models/bitnet-b1.58-2B-4T/ggml-model-i2_s.gguf", |
| "model_size_mb": 1187.310112, |
| "n_runs": 8, |
| "avg_gen_tok_s": 72.73875, |
| "avg_prefill_tok_s": 434.95375, |
| "avg_ms_per_tok": 13.75375, |
| "runs": [ |
| { |
| "gen_tok_s": 75.47, |
| "gen_ms_per_tok": 13.25, |
| "prefill_tok_s": 473.97, |
| "wall_time": 3.8744120597839355 |
| }, |
| { |
| "gen_tok_s": 74.61, |
| "gen_ms_per_tok": 13.4, |
| "prefill_tok_s": 400.33, |
| "wall_time": 3.946519613265991 |
| }, |
| { |
| "gen_tok_s": 73.39, |
| "gen_ms_per_tok": 13.63, |
| "prefill_tok_s": 468.56, |
| "wall_time": 3.981218099594116 |
| }, |
| { |
| "gen_tok_s": 71.47, |
| "gen_ms_per_tok": 13.99, |
| "prefill_tok_s": 403.88, |
| "wall_time": 4.082836866378784 |
| }, |
| { |
| "gen_tok_s": 71.75, |
| "gen_ms_per_tok": 13.94, |
| "prefill_tok_s": 425.03, |
| "wall_time": 4.0725319385528564 |
| }, |
| { |
| "gen_tok_s": 71.9, |
| "gen_ms_per_tok": 13.91, |
| "prefill_tok_s": 399.68, |
| "wall_time": 4.0745580196380615 |
| }, |
| { |
| "gen_tok_s": 71.23, |
| "gen_ms_per_tok": 14.04, |
| "prefill_tok_s": 466.73, |
| "wall_time": 4.097683429718018 |
| }, |
| { |
| "gen_tok_s": 72.09, |
| "gen_ms_per_tok": 13.87, |
| "prefill_tok_s": 441.45, |
| "wall_time": 4.053175687789917 |
| } |
| ] |
| }, |
| { |
| "name": "Qwen2.5 1.5B (Q4_K_M)", |
| "model_path": "/home/cpinchington/MedusaBitNet/models/competing/qwen2.5-1.5b-instruct-q4_k_m.gguf", |
| "model_size_mb": 1117.320736, |
| "n_runs": 8, |
| "avg_gen_tok_s": 88.79125, |
| "avg_prefill_tok_s": 317.92375, |
| "avg_ms_per_tok": 11.2625, |
| "runs": [ |
| { |
| "gen_tok_s": 88.83, |
| "gen_ms_per_tok": 11.26, |
| "prefill_tok_s": 277.6, |
| "wall_time": 3.4317729473114014 |
| }, |
| { |
| "gen_tok_s": 88.81, |
| "gen_ms_per_tok": 11.26, |
| "prefill_tok_s": 349.13, |
| "wall_time": 3.419360637664795 |
| }, |
| { |
| "gen_tok_s": 87.75, |
| "gen_ms_per_tok": 11.4, |
| "prefill_tok_s": 362.62, |
| "wall_time": 3.448280096054077 |
| }, |
| { |
| "gen_tok_s": 88.93, |
| "gen_ms_per_tok": 11.24, |
| "prefill_tok_s": 371.15, |
| "wall_time": 2.9640591144561768 |
| }, |
| { |
| "gen_tok_s": 88.83, |
| "gen_ms_per_tok": 11.26, |
| "prefill_tok_s": 278.95, |
| "wall_time": 3.4554200172424316 |
| }, |
| { |
| "gen_tok_s": 89.49, |
| "gen_ms_per_tok": 11.17, |
| "prefill_tok_s": 271.77, |
| "wall_time": 2.8726541996002197 |
| }, |
| { |
| "gen_tok_s": 88.94, |
| "gen_ms_per_tok": 11.24, |
| "prefill_tok_s": 278.1, |
| "wall_time": 3.42142915725708 |
| }, |
| { |
| "gen_tok_s": 88.75, |
| "gen_ms_per_tok": 11.27, |
| "prefill_tok_s": 354.07, |
| "wall_time": 3.4249227046966553 |
| } |
| ] |
| }, |
| { |
| "name": "Llama 3.2 1B (Q4_K_M)", |
| "model_path": "/home/cpinchington/MedusaBitNet/models/competing/Llama-3.2-1B-Instruct-Q4_K_M.gguf", |
| "model_size_mb": 807.694464, |
| "n_runs": 8, |
| "avg_gen_tok_s": 115.94624999999999, |
| "avg_prefill_tok_s": 440.73375, |
| "avg_ms_per_tok": 8.62625, |
| "runs": [ |
| { |
| "gen_tok_s": 115.23, |
| "gen_ms_per_tok": 8.68, |
| "prefill_tok_s": 373.11, |
| "wall_time": 3.3805642127990723 |
| }, |
| { |
| "gen_tok_s": 115.66, |
| "gen_ms_per_tok": 8.65, |
| "prefill_tok_s": 499.09, |
| "wall_time": 3.3459088802337646 |
| }, |
| { |
| "gen_tok_s": 115.25, |
| "gen_ms_per_tok": 8.68, |
| "prefill_tok_s": 505.72, |
| "wall_time": 3.353907346725464 |
| }, |
| { |
| "gen_tok_s": 116.84, |
| "gen_ms_per_tok": 8.56, |
| "prefill_tok_s": 324.38, |
| "wall_time": 3.3358867168426514 |
| }, |
| { |
| "gen_tok_s": 115.81, |
| "gen_ms_per_tok": 8.63, |
| "prefill_tok_s": 531.45, |
| "wall_time": 3.3553287982940674 |
| }, |
| { |
| "gen_tok_s": 116.16, |
| "gen_ms_per_tok": 8.61, |
| "prefill_tok_s": 524.34, |
| "wall_time": 3.340409278869629 |
| }, |
| { |
| "gen_tok_s": 117.13, |
| "gen_ms_per_tok": 8.54, |
| "prefill_tok_s": 341.84, |
| "wall_time": 3.348862648010254 |
| }, |
| { |
| "gen_tok_s": 115.49, |
| "gen_ms_per_tok": 8.66, |
| "prefill_tok_s": 425.94, |
| "wall_time": 3.349743366241455 |
| } |
| ] |
| }, |
| { |
| "name": "Gemma 2 2B (Q4_K_M)", |
| "model_path": "/home/cpinchington/MedusaBitNet/models/competing/gemma-2-2b-it-Q4_K_M.gguf", |
| "model_size_mb": 1708.582752, |
| "n_runs": 8, |
| "avg_gen_tok_s": 50.53125, |
| "avg_prefill_tok_s": 200.96, |
| "avg_ms_per_tok": 19.7875, |
| "runs": [ |
| { |
| "gen_tok_s": 50.47, |
| "gen_ms_per_tok": 19.81, |
| "prefill_tok_s": 184.03, |
| "wall_time": 5.728861331939697 |
| }, |
| { |
| "gen_tok_s": 50.67, |
| "gen_ms_per_tok": 19.73, |
| "prefill_tok_s": 229.99, |
| "wall_time": 5.6800384521484375 |
| }, |
| { |
| "gen_tok_s": 50.37, |
| "gen_ms_per_tok": 19.85, |
| "prefill_tok_s": 165.91, |
| "wall_time": 5.7390992641448975 |
| }, |
| { |
| "gen_tok_s": 50.7, |
| "gen_ms_per_tok": 19.72, |
| "prefill_tok_s": 174.06, |
| "wall_time": 5.694071292877197 |
| }, |
| { |
| "gen_tok_s": 50.63, |
| "gen_ms_per_tok": 19.75, |
| "prefill_tok_s": 190.64, |
| "wall_time": 5.7339208126068115 |
| }, |
| { |
| "gen_tok_s": 50.33, |
| "gen_ms_per_tok": 19.87, |
| "prefill_tok_s": 253.86, |
| "wall_time": 5.723286867141724 |
| }, |
| { |
| "gen_tok_s": 50.52, |
| "gen_ms_per_tok": 19.79, |
| "prefill_tok_s": 233.19, |
| "wall_time": 5.69863224029541 |
| }, |
| { |
| "gen_tok_s": 50.56, |
| "gen_ms_per_tok": 19.78, |
| "prefill_tok_s": 176.0, |
| "wall_time": 5.730913162231445 |
| } |
| ] |
| } |
| ], |
| "hardware": "AMD Ryzen AI MAX+ 395 (Strix Halo)", |
| "threads": 16 |
| } |