Spaces:
Running on Zero
Running on Zero
File size: 237,213 Bytes
ee8ca43 | 1 2 | {"source": "LocalScore (localscore.ai) — real community benchmark runs", "workload": "decode tok/s averaged over the LocalScore scenario grid", "generated_at": "2026-06-10T12:25:42+00:00", "count": 1973, "points": [{"bw": 400.0, "tps": 23.64, "params_b": 14.77, "accel": "Apple M3 Max 12P+4E+40GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 400.0, "tps": 147.65, "params_b": 1.5, "accel": "Apple M3 Max 12P+4E+40GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 400.0, "tps": 23.98, "params_b": 14.77, "accel": "Apple M3 Max 12P+4E+40GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 546.0, "tps": 164.72, "params_b": 1.5, "accel": "Apple M4 Max 10P+4E+32GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 546.0, "tps": 165.05, "params_b": 1.5, "accel": "Apple M4 Max 10P+4E+32GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 672.0, "tps": 66.26, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 5070 Ti Laptop GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 672.0, "tps": 171.69, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 5070 Ti Laptop GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 672.0, "tps": 172.59, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 5070 Ti Laptop GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 672.0, "tps": 64.21, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 5070 Ti Laptop GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 672.0, "tps": 36.13, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 5070 Ti Laptop GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 448.0, "tps": 53.32, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 2070", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 672.0, "tps": 45.69, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 5070", "model": "Qwen2.5 14B Instruct"}, {"bw": 273.0, "tps": 120.79, "params_b": 1.5, "accel": "Apple M4 Pro 10P+4E+20GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 273.0, "tps": 18.02, "params_b": 14.77, "accel": "Apple M4 Pro 10P+4E+20GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 800.0, "tps": 36.53, "params_b": 14.77, "accel": "Apple M2 Ultra 16P+8E+76GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 150.0, "tps": 12.28, "params_b": 14.77, "accel": "Apple M3 Pro 6P+6E+18GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 448.0, "tps": 184.75, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 5060", "model": "Llama 3.2 1B Instruct"}, {"bw": 1792.0, "tps": 70.02, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 5090", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 448.0, "tps": 51.46, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 2070", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 272.0, "tps": 39.6, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 4060", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 936.0, "tps": 68.1, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 3090", "model": "Qwen2.5 14B Instruct"}, {"bw": 936.0, "tps": 116.96, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 3090", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 936.0, "tps": 116.88, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 3090", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 936.0, "tps": 366.8, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 3090", "model": "Llama 3.2 1B Instruct"}, {"bw": 936.0, "tps": 376.69, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 3090", "model": "Llama 3.2 1B Instruct"}, {"bw": 760.0, "tps": 93.3, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 3080", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 760.0, "tps": 330.32, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 3080", "model": "Llama 3.2 1B Instruct"}, {"bw": 200.0, "tps": 24.48, "params_b": 8.03, "accel": "Apple M2 Pro 6P+4E+16GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 96.0, "tps": 26.56, "params_b": 1.5, "accel": "Quadro P620", "model": "Llama 3.2 1B Instruct"}, {"bw": 96.0, "tps": 26.44, "params_b": 1.5, "accel": "Quadro P620", "model": "Llama 3.2 1B Instruct"}, {"bw": 448.0, "tps": 57.39, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 2070 SUPER", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 448.0, "tps": 211.9, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 2070 SUPER", "model": "Llama 3.2 1B Instruct"}, {"bw": 150.0, "tps": 92.58, "params_b": 1.5, "accel": "Apple M3 Pro 6P+6E+18GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 672.0, "tps": 40.33, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 4070 Ti SUPER", "model": "Qwen2.5 14B Instruct"}, {"bw": 672.0, "tps": 66.26, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 4070 Ti SUPER", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 936.0, "tps": 48.0, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 3090", "model": "Qwen2.5 14B Instruct"}, {"bw": 936.0, "tps": 48.14, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 3090", "model": "Qwen2.5 14B Instruct"}, {"bw": 936.0, "tps": 79.7, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 3090", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 936.0, "tps": 207.76, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 3090", "model": "Llama 3.2 1B Instruct"}, {"bw": 936.0, "tps": 216.0, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 3090", "model": "Llama 3.2 1B Instruct"}, {"bw": 400.0, "tps": 92.76, "params_b": 1.5, "accel": "Apple M1 Max 8P+2E+32GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 448.0, "tps": 39.4, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 5060 Ti", "model": "Qwen2.5 14B Instruct"}, {"bw": 448.0, "tps": 309.0, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 5060 Ti", "model": "Llama 3.2 1B Instruct"}, {"bw": 546.0, "tps": 22.4, "params_b": 14.77, "accel": "Apple M4 Max 10P+4E+32GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 546.0, "tps": 42.21, "params_b": 8.03, "accel": "Apple M4 Max 10P+4E+32GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 546.0, "tps": 162.24, "params_b": 1.5, "accel": "Apple M4 Max 10P+4E+32GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 360.0, "tps": 29.37, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 3060", "model": "Qwen2.5 14B Instruct"}, {"bw": 546.0, "tps": 33.7, "params_b": 8.03, "accel": "Apple M4 Max 10P+4E+32GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 546.0, "tps": 170.89, "params_b": 1.5, "accel": "Apple M4 Max 10P+4E+32GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 100.0, "tps": 278.22, "params_b": 1.5, "accel": "Tesla V100-SXM2-16GB", "model": "Llama 3.2 1B Instruct"}, {"bw": 800.0, "tps": 33.59, "params_b": 14.77, "accel": "Apple M2 Ultra 16P+8E+60GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 448.0, "tps": 41.88, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 3070 Laptop GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 448.0, "tps": 124.25, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 3070 Laptop GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 400.0, "tps": 18.66, "params_b": 14.77, "accel": "Apple M1 Max 8P+2E+32GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 256.0, "tps": 195.5, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 4070 Laptop GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 150.0, "tps": 22.23, "params_b": 8.03, "accel": "Apple M3 Pro 5P+6E+14GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 150.0, "tps": 12.18, "params_b": 14.77, "accel": "Apple M3 Pro 5P+6E+14GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 896.0, "tps": 34.72, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 5080 Laptop GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 896.0, "tps": 40.47, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 5090 Laptop GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 896.0, "tps": 71.34, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 5090 Laptop GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 896.0, "tps": 182.33, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 5090 Laptop GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 672.0, "tps": 70.04, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 5070 Ti Laptop GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 760.0, "tps": 75.45, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 3080", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 672.0, "tps": 37.46, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 5070 Ti Laptop GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 760.0, "tps": 31.06, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 3080", "model": "Qwen2.5 14B Instruct"}, {"bw": 672.0, "tps": 186.25, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 5070 Ti Laptop GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 760.0, "tps": 226.24, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 3080", "model": "Llama 3.2 1B Instruct"}, {"bw": 307.0, "tps": 174.69, "params_b": 1.5, "accel": "Apple M5 Pro 6P+12E+20GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 200.0, "tps": 78.46, "params_b": 1.5, "accel": "Apple M1 Pro 8P+2E+16GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 120.0, "tps": 17.12, "params_b": 8.03, "accel": "Apple M4 4P+6E+10GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 200.0, "tps": 29.12, "params_b": 4.45, "accel": "Apple M1 Pro 6P+2E+14GPU", "model": "Phi-4-Mini-Reasoning"}, {"bw": 307.0, "tps": 43.16, "params_b": 8.03, "accel": "Apple M5 Pro 5P+10E+16GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 307.0, "tps": 21.64, "params_b": 14.77, "accel": "Apple M5 Pro 5P+10E+16GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 96.0, "tps": 1.31, "params_b": 8.03, "accel": "Quadro P620", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 96.0, "tps": 18.57, "params_b": 1.5, "accel": "Quadro P620", "model": "Llama 3.2 1B Instruct"}, {"bw": 120.0, "tps": 9.44, "params_b": 14.77, "accel": "Apple M4 4P+6E+10GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 672.0, "tps": 37.09, "params_b": 14.77, "accel": "Quadro RTX 6000", "model": "Qwen2.5 14B Instruct"}, {"bw": 672.0, "tps": 64.86, "params_b": 8.03, "accel": "Quadro RTX 6000", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 448.0, "tps": 32.88, "params_b": 14.77, "accel": "NVIDIA RTX A4000", "model": "Qwen2.5 14B Instruct"}, {"bw": 448.0, "tps": 32.68, "params_b": 14.77, "accel": "NVIDIA RTX A4000", "model": "Qwen2.5 14B Instruct"}, {"bw": 546.0, "tps": 165.29, "params_b": 1.5, "accel": "Apple M4 Max 10P+4E+32GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 307.0, "tps": 23.69, "params_b": 14.77, "accel": "Apple M5 Pro 6P+12E+20GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 100.0, "tps": 65.03, "params_b": 1.5, "accel": "Apple M3 4P+4E+10GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 120.0, "tps": 74.01, "params_b": 1.5, "accel": "Apple M4 4P+6E+10GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 448.0, "tps": 32.76, "params_b": 14.77, "accel": "NVIDIA RTX A4000", "model": "Qwen2.5 14B Instruct"}, {"bw": 448.0, "tps": 33.08, "params_b": 14.77, "accel": "NVIDIA RTX A4000", "model": "Qwen2.5 14B Instruct"}, {"bw": 448.0, "tps": 33.2, "params_b": 14.77, "accel": "NVIDIA RTX A4000", "model": "Qwen2.5 14B Instruct"}, {"bw": 448.0, "tps": 58.91, "params_b": 8.03, "accel": "NVIDIA RTX A4000", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 448.0, "tps": 58.76, "params_b": 8.03, "accel": "NVIDIA RTX A4000", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 448.0, "tps": 32.94, "params_b": 14.77, "accel": "NVIDIA RTX A4000", "model": "Qwen2.5 14B Instruct"}, {"bw": 448.0, "tps": 59.08, "params_b": 8.03, "accel": "NVIDIA RTX A4000", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 448.0, "tps": 32.84, "params_b": 14.77, "accel": "NVIDIA RTX A4000", "model": "Qwen2.5 14B Instruct"}, {"bw": 448.0, "tps": 58.61, "params_b": 8.03, "accel": "NVIDIA RTX A4000", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 360.0, "tps": 70.85, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 3060", "model": "Llama 3.2 1B Instruct"}, {"bw": 760.0, "tps": 30.71, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 3080", "model": "Qwen2.5 14B Instruct"}, {"bw": 760.0, "tps": 114.39, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 3080", "model": "Llama 3.2 1B Instruct"}, {"bw": 760.0, "tps": 52.15, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 3080", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 936.0, "tps": 61.92, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 3090", "model": "Qwen2.5 14B Instruct"}, {"bw": 273.0, "tps": 84.42, "params_b": 1.5, "accel": "Apple M4 Pro 8P+4E+16GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 896.0, "tps": 14.15, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 5070 Ti", "model": "Qwen2.5 14B Instruct"}, {"bw": 896.0, "tps": 53.41, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 5070 Ti", "model": "Llama 3.2 1B Instruct"}, {"bw": 504.0, "tps": 22.61, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 4070 SUPER", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 912.0, "tps": 44.9, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 3080 Ti", "model": "Qwen2.5 14B Instruct"}, {"bw": 912.0, "tps": 79.24, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 3080 Ti", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 448.0, "tps": 189.11, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 2070 Super", "model": "Llama 3.2 1B Instruct"}, {"bw": 448.0, "tps": 49.89, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 2070", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 760.0, "tps": 92.41, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 3080", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 760.0, "tps": 321.97, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 3080", "model": "Llama 3.2 1B Instruct"}, {"bw": 504.0, "tps": 13.59, "params_b": 12.25, "accel": "NVIDIA GeForce RTX 4070 Ti", "model": "Mistral Nemo 2407 12B Thinking Claude Gemini GPT5.2 Uncensored HERETIC"}, {"bw": 624.0, "tps": 34.11, "params_b": 8.03, "accel": "AMD Radeon RX 7800 XT", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 624.0, "tps": 116.73, "params_b": 1.5, "accel": "AMD Radeon RX 7800 XT", "model": "Llama 3.2 1B Instruct"}, {"bw": 624.0, "tps": 21.32, "params_b": 14.77, "accel": "AMD Radeon RX 7800 XT", "model": "Qwen2.5 14B Instruct"}, {"bw": 760.0, "tps": 14.45, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 3080", "model": "Qwen2.5 14B Instruct"}, {"bw": 760.0, "tps": 208.7, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 3080", "model": "Llama 3.2 1B Instruct"}, {"bw": 307.0, "tps": 23.65, "params_b": 14.77, "accel": "Apple M5 Pro 6P+12E+20GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 120.0, "tps": 7.31, "params_b": 14.77, "accel": "Apple M4 4P+6E+8GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 448.0, "tps": 36.37, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 5060 Ti", "model": "Qwen2.5 14B Instruct"}, {"bw": 448.0, "tps": 215.73, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 5060 Ti", "model": "Llama 3.2 1B Instruct"}, {"bw": 1008.0, "tps": 59.7, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 4090", "model": "Qwen2.5 14B Instruct"}, {"bw": 200.0, "tps": 72.26, "params_b": 1.5, "accel": "Apple M1 Pro 6P+2E+14GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 153.0, "tps": 100.66, "params_b": 1.5, "accel": "Apple M5 4P+6E+10GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 153.0, "tps": 100.71, "params_b": 1.5, "accel": "Apple M5 4P+6E+10GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 153.0, "tps": 99.2, "params_b": 1.5, "accel": "Apple M5 4P+6E+10GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 200.0, "tps": 90.96, "params_b": 1.5, "accel": "Apple M2 Pro 6P+4E+16GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 760.0, "tps": 76.4, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 3080", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 200.0, "tps": 90.92, "params_b": 1.5, "accel": "Apple M2 Pro 6P+4E+16GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 760.0, "tps": 197.98, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 3080", "model": "Llama 3.2 1B Instruct"}, {"bw": 936.0, "tps": 59.83, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 3090", "model": "Qwen2.5 14B Instruct"}, {"bw": 256.0, "tps": 28.17, "params_b": 4.45, "accel": "AMD Radeon RX 6600 XT", "model": "Phi 4 Mini Instruct"}, {"bw": 256.0, "tps": 18.29, "params_b": 8.19, "accel": "AMD Radeon RX 6600 XT", "model": "Deepseek-R1-0528-Qwen3-8B"}, {"bw": 256.0, "tps": 23.05, "params_b": 7.62, "accel": "AMD Radeon RX 6600 XT", "model": "Qwen2.5.1 Coder 7B Instruct"}, {"bw": 256.0, "tps": 21.89, "params_b": 8.03, "accel": "AMD Radeon RX 6600 XT", "model": "Llama 3.1 8B Instruct Abliterated_Via_Adapter"}, {"bw": 256.0, "tps": 24.65, "params_b": 4.55, "accel": "AMD Radeon RX 6600 XT", "model": "Gemma 3 Finetune"}, {"bw": 256.0, "tps": 26.35, "params_b": 4.41, "accel": "AMD Radeon RX 6600 XT", "model": "Qwen3 4B Instruct 2507"}, {"bw": 120.0, "tps": 66.26, "params_b": 1.5, "accel": "Apple M4 4P+6E+8GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 273.0, "tps": 16.69, "params_b": 14.77, "accel": "Apple M4 Pro 8P+4E+16GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 273.0, "tps": 110.92, "params_b": 1.5, "accel": "Apple M4 Pro 8P+4E+16GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 68.0, "tps": 37.28, "params_b": 1.5, "accel": "Apple M1 4P+4E+7GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 672.0, "tps": 252.7, "params_b": 1.5, "accel": "Quadro RTX 6000", "model": "Llama 3.2 1B Instruct"}, {"bw": 273.0, "tps": 29.52, "params_b": 8.03, "accel": "Apple M4 Pro 8P+4E+16GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 224.0, "tps": 101.11, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 3050", "model": "Llama 3.2 1B Instruct"}, {"bw": 224.0, "tps": 25.83, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 3050", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 224.0, "tps": 108.0, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 3050", "model": "Llama 3.2 1B Instruct"}, {"bw": 224.0, "tps": 107.21, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 3050", "model": "Llama 3.2 1B Instruct"}, {"bw": 448.0, "tps": 55.3, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 5060 Ti", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 400.0, "tps": 20.9, "params_b": 14.77, "accel": "Apple M1 Max 8P+2E+32GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 400.0, "tps": 20.24, "params_b": 14.77, "accel": "Apple M1 Max 8P+2E+24GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 224.0, "tps": 4.72, "params_b": 8.03, "accel": "AMD Radeon RX 6600", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 168.0, "tps": 21.47, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 3050 6GB Laptop GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 89.0, "tps": 10.37, "params_b": 1.5, "accel": "AMD Radeon 760M Graphics", "model": "Llama 3.2 1B Instruct"}, {"bw": 168.0, "tps": 90.63, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 3050 6GB Laptop GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 224.0, "tps": 11.5, "params_b": 1.5, "accel": "AMD Radeon RX 6600", "model": "Llama 3.2 1B Instruct"}, {"bw": 448.0, "tps": 33.23, "params_b": 14.77, "accel": "NVIDIA RTX A4000", "model": "Qwen2.5 14B Instruct"}, {"bw": 120.0, "tps": 76.56, "params_b": 1.5, "accel": "Apple M4 4P+6E+10GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 448.0, "tps": 32.9, "params_b": 14.77, "accel": "NVIDIA RTX A4000", "model": "Qwen2.5 14B Instruct"}, {"bw": 120.0, "tps": 16.4, "params_b": 8.03, "accel": "Apple M4 4P+6E+10GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 614.0, "tps": 37.55, "params_b": 14.77, "accel": "Apple M5 Max 6P+12E+40GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 448.0, "tps": 33.22, "params_b": 14.77, "accel": "NVIDIA RTX A4000", "model": "Qwen2.5 14B Instruct"}, {"bw": 614.0, "tps": 36.16, "params_b": 14.77, "accel": "Apple M5 Max 6P+12E+40GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 273.0, "tps": 110.45, "params_b": 1.5, "accel": "Apple M4 Pro 8P+4E+16GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 400.0, "tps": 153.1, "params_b": 1.5, "accel": "Apple M3 Max 12P+4E+40GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 400.0, "tps": 32.27, "params_b": 8.03, "accel": "Apple M1 Max 8P+2E+32GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 546.0, "tps": 49.62, "params_b": 8.03, "accel": "Apple M4 Max 12P+4E+40GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 546.0, "tps": 179.26, "params_b": 1.5, "accel": "Apple M4 Max 12P+4E+40GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 640.0, "tps": 44.15, "params_b": 14.77, "accel": "NVIDIA RTX A4500", "model": "Qwen2.5 14B Instruct"}, {"bw": 640.0, "tps": 78.2, "params_b": 8.03, "accel": "NVIDIA RTX A4500", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 640.0, "tps": 288.25, "params_b": 1.5, "accel": "NVIDIA RTX A4500", "model": "Llama 3.2 1B Instruct"}, {"bw": 288.0, "tps": 44.87, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 4060 Ti", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 68.0, "tps": 9.72, "params_b": 8.03, "accel": "Apple M1 4P+4E+8GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 273.0, "tps": 16.7, "params_b": 14.77, "accel": "Apple M4 Pro 8P+4E+16GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 120.0, "tps": 77.2, "params_b": 1.5, "accel": "Apple M4 4P+6E+10GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 120.0, "tps": 17.8, "params_b": 8.03, "accel": "Apple M4 4P+6E+10GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 120.0, "tps": 9.57, "params_b": 14.77, "accel": "Apple M4 4P+6E+10GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 120.0, "tps": 66.31, "params_b": 1.5, "accel": "Apple M4 4P+6E+10GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 192.0, "tps": 31.17, "params_b": 1.5, "accel": "NVIDIA GeForce GTX 1650 SUPER", "model": "Llama 3.2 1B Instruct"}, {"bw": 960.0, "tps": 32.39, "params_b": 14.77, "accel": "AMD Radeon RX 7900 XTX", "model": "Qwen2.5 14B Instruct"}, {"bw": 112.0, "tps": 44.93, "params_b": 1.5, "accel": "NVIDIA GeForce GTX 1050 Ti", "model": "Llama 3.2 1B Instruct"}, {"bw": 112.0, "tps": 44.73, "params_b": 1.5, "accel": "NVIDIA GeForce GTX 1050 Ti", "model": "Llama 3.2 1B Instruct"}, {"bw": 112.0, "tps": 1.16, "params_b": 14.77, "accel": "NVIDIA GeForce GTX 1050 Ti", "model": "Qwen2.5 14B Instruct"}, {"bw": 273.0, "tps": 17.95, "params_b": 14.77, "accel": "Apple M4 Pro 10P+4E+20GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 120.0, "tps": 12.98, "params_b": 8.03, "accel": "Apple M4 4P+6E+10GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 120.0, "tps": 73.43, "params_b": 1.5, "accel": "Apple M4 4P+6E+10GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 224.0, "tps": 34.96, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 3050 OEM", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 912.0, "tps": 60.19, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 3080 Ti", "model": "Qwen2.5 14B Instruct"}, {"bw": 150.0, "tps": 11.67, "params_b": 14.77, "accel": "Apple M3 Pro 6P+6E+18GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 912.0, "tps": 83.97, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 3080 Ti", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 896.0, "tps": 171.66, "params_b": 0.62, "accel": "NVIDIA GeForce RTX 5070 Ti", "model": "Qwen1.5-0.5B-Chat-AWQ-fp16"}, {"bw": 546.0, "tps": 26.83, "params_b": 14.77, "accel": "Apple M4 Max 10P+4E+32GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 150.0, "tps": 9.91, "params_b": 14.77, "accel": "Apple M3 Pro 5P+6E+14GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 546.0, "tps": 26.85, "params_b": 14.77, "accel": "Apple M4 Max 10P+4E+32GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 307.0, "tps": 43.74, "params_b": 8.03, "accel": "Apple M5 Pro 5P+10E+16GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 936.0, "tps": 30.55, "params_b": 32.76, "accel": "NVIDIA GeForce RTX 3090", "model": "QwQ 32B"}, {"bw": 936.0, "tps": 58.2, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 3090", "model": "Qwen2.5 14B Instruct"}, {"bw": 936.0, "tps": 100.76, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 3090", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 936.0, "tps": 342.42, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 3090", "model": "Llama 3.2 1B Instruct"}, {"bw": 448.0, "tps": 58.52, "params_b": 8.03, "accel": "NVIDIA RTX A4000", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 273.0, "tps": 110.38, "params_b": 1.5, "accel": "Apple M4 Pro 8P+4E+16GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 448.0, "tps": 18.78, "params_b": 14.77, "accel": "NVIDIA RTX A4000", "model": "Qwen2.5 14B Instruct"}, {"bw": 128.0, "tps": 21.75, "params_b": 1.5, "accel": "NVIDIA GeForce GTX 1650", "model": "Llama 3.2 1B Instruct"}, {"bw": 936.0, "tps": 58.57, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 3090", "model": "Qwen2.5 14B Instruct"}, {"bw": 936.0, "tps": 102.71, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 3090", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 936.0, "tps": 349.53, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 3090", "model": "Llama 3.2 1B Instruct"}, {"bw": 936.0, "tps": 58.37, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 3090", "model": "Qwen2.5 14B Instruct"}, {"bw": 273.0, "tps": 16.65, "params_b": 14.77, "accel": "Apple M4 Pro 8P+4E+16GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 448.0, "tps": 32.44, "params_b": 14.77, "accel": "NVIDIA RTX A4000", "model": "Qwen2.5 14B Instruct"}, {"bw": 400.0, "tps": 20.74, "params_b": 14.77, "accel": "Apple M1 Max 8P+2E+32GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 400.0, "tps": 124.5, "params_b": 1.5, "accel": "Apple M1 Max 8P+2E+32GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 400.0, "tps": 37.65, "params_b": 8.03, "accel": "Apple M1 Max 8P+2E+32GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 936.0, "tps": 61.6, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 3090", "model": "Qwen2.5 14B Instruct"}, {"bw": 936.0, "tps": 348.48, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 3090", "model": "Llama 3.2 1B Instruct"}, {"bw": 256.0, "tps": 40.29, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 4060 Laptop GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 936.0, "tps": 13.92, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 3090", "model": "Qwen2.5 14B Instruct"}, {"bw": 936.0, "tps": 25.1, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 3090", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 936.0, "tps": 45.27, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 3090", "model": "Llama 3.2 1B Instruct"}, {"bw": 68.0, "tps": 14.61, "params_b": 8.03, "accel": "Apple M1 4P+4E+8GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 120.0, "tps": 17.4, "params_b": 8.03, "accel": "Apple M4 4P+6E+10GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 896.0, "tps": 12.63, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 5070 Ti", "model": "Qwen2.5 14B Instruct"}, {"bw": 896.0, "tps": 23.97, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 5070 Ti", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 896.0, "tps": 47.73, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 5070 Ti", "model": "Llama 3.2 1B Instruct"}, {"bw": 896.0, "tps": 46.75, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 5070 Ti", "model": "Llama 3.2 1B Instruct"}, {"bw": 1008.0, "tps": 98.45, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 4090", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 153.0, "tps": 19.59, "params_b": 8.03, "accel": "Apple M5 4P+6E+10GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 546.0, "tps": 47.7, "params_b": 8.03, "accel": "Apple M4 Max 10P+4E+32GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 192.0, "tps": 31.69, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 192.0, "tps": 132.33, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 384.0, "tps": 12.43, "params_b": 1.5, "accel": "AMD Radeon RX 6700 XT", "model": "Llama 3.2 1B Instruct"}, {"bw": 192.0, "tps": 168.71, "params_b": 0.37, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "Llm 350m Instruct v2"}, {"bw": 307.0, "tps": 174.33, "params_b": 1.5, "accel": "Apple M5 Pro 6P+12E+20GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 307.0, "tps": 44.53, "params_b": 8.03, "accel": "Apple M5 Pro 6P+12E+20GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 307.0, "tps": 24.27, "params_b": 14.77, "accel": "Apple M5 Pro 6P+12E+20GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 288.0, "tps": 20.03, "params_b": 14.77, "accel": "NVIDIA RTX A2000 12GB", "model": "Qwen2.5 14B Instruct"}, {"bw": 288.0, "tps": 35.73, "params_b": 8.03, "accel": "NVIDIA RTX A2000 12GB", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 288.0, "tps": 154.91, "params_b": 1.5, "accel": "NVIDIA RTX A2000 12GB", "model": "Llama 3.2 1B Instruct"}, {"bw": 819.0, "tps": 35.91, "params_b": 14.77, "accel": "Apple M3 Ultra 24P+8E+80GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 68.0, "tps": 38.68, "params_b": 1.5, "accel": "Apple M1 4P+4E+8GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 68.0, "tps": 8.88, "params_b": 8.03, "accel": "Apple M1 4P+4E+8GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 192.0, "tps": 91.01, "params_b": 2.03, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "Qwen3 1.7B"}, {"bw": 224.0, "tps": 29.51, "params_b": 8.03, "accel": "NVIDIA RTX 2000 Ada Generation Laptop GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 546.0, "tps": 48.62, "params_b": 8.03, "accel": "Apple M4 Max 12P+4E+40GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 192.0, "tps": 19.54, "params_b": 1.5, "accel": "NVIDIA GeForce GTX 1650 Ti with Max-Q Design", "model": "Llama 3.2 1B Instruct"}, {"bw": 360.0, "tps": 9.51, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 3060", "model": "Qwen2.5 14B Instruct"}, {"bw": 546.0, "tps": 28.5, "params_b": 14.77, "accel": "Apple M4 Max 12P+4E+40GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 120.0, "tps": 9.5, "params_b": 14.77, "accel": "Apple M4 4P+6E+10GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 100.0, "tps": 66.81, "params_b": 1.5, "accel": "Apple M3 4P+4E+10GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 89.0, "tps": 5.63, "params_b": 8.03, "accel": "AMD Radeon 780M Graphics", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 89.0, "tps": 3.05, "params_b": 14.77, "accel": "AMD Radeon 780M Graphics", "model": "Qwen2.5 14B Instruct"}, {"bw": 307.0, "tps": 24.0, "params_b": 14.77, "accel": "Apple M5 Pro 6P+12E+20GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 1008.0, "tps": 69.62, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 4090", "model": "Qwen2.5 14B Instruct"}, {"bw": 192.0, "tps": 94.12, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 273.0, "tps": 117.88, "params_b": 1.5, "accel": "Apple M4 Pro 10P+4E+20GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 336.0, "tps": 35.33, "params_b": 1.5, "accel": "NVIDIA GeForce GTX 1660 SUPER", "model": "Llama 3.2 1B Instruct"}, {"bw": 336.0, "tps": 35.34, "params_b": 1.5, "accel": "NVIDIA GeForce GTX 1660 SUPER", "model": "Llama 3.2 1B Instruct"}, {"bw": 336.0, "tps": 35.48, "params_b": 1.5, "accel": "NVIDIA GeForce GTX 1660 SUPER", "model": "Llama 3.2 1B Instruct"}, {"bw": 336.0, "tps": 35.2, "params_b": 1.5, "accel": "NVIDIA GeForce GTX 1660 SUPER", "model": "Llama 3.2 1B Instruct"}, {"bw": 546.0, "tps": 58.81, "params_b": 8.03, "accel": "Apple M4 Max 12P+4E+40GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 546.0, "tps": 191.24, "params_b": 1.5, "accel": "Apple M4 Max 12P+4E+40GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 672.0, "tps": 49.06, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 4070 Ti SUPER", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 672.0, "tps": 48.83, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 4070 Ti SUPER", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 672.0, "tps": 137.0, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 4070 Ti SUPER", "model": "Llama 3.2 1B Instruct"}, {"bw": 672.0, "tps": 49.13, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 4070 Ti SUPER", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 100.0, "tps": 4.32, "params_b": 14.77, "accel": "Apple M3 4P+4E+10GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 150.0, "tps": 22.03, "params_b": 8.03, "accel": "Apple M3 Pro 5P+6E+14GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 150.0, "tps": 88.68, "params_b": 1.5, "accel": "Apple M3 Pro 5P+6E+14GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 256.0, "tps": 192.12, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 4060 Laptop GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 614.0, "tps": 33.78, "params_b": 14.77, "accel": "Apple M5 Max 6P+12E+32GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 614.0, "tps": 62.22, "params_b": 8.03, "accel": "Apple M5 Max 6P+12E+32GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 307.0, "tps": 44.9, "params_b": 8.03, "accel": "Apple M5 Pro 6P+12E+20GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 307.0, "tps": 24.2, "params_b": 14.77, "accel": "Apple M5 Pro 6P+12E+20GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 614.0, "tps": 230.16, "params_b": 1.5, "accel": "Apple M5 Max 6P+12E+32GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 307.0, "tps": 176.0, "params_b": 1.5, "accel": "Apple M5 Pro 6P+12E+20GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 89.0, "tps": 3.5, "params_b": 8.03, "accel": "AMD Radeon 780M Graphics", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 346.0, "tps": 11.47, "params_b": 14.77, "accel": "Tesla P40", "model": "Qwen2.5 14B Instruct"}, {"bw": 200.0, "tps": 13.34, "params_b": 14.77, "accel": "Apple M2 Pro 6P+4E+16GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 200.0, "tps": 13.31, "params_b": 14.77, "accel": "Apple M2 Pro 6P+4E+16GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 120.0, "tps": 78.11, "params_b": 1.5, "accel": "Apple M4 4P+6E+10GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 68.0, "tps": 22.07, "params_b": 1.5, "accel": "Apple M1 4P+4E+8GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 346.0, "tps": 117.52, "params_b": 1.5, "accel": "Tesla P40", "model": "Llama 3.2 1B Instruct"}, {"bw": 153.0, "tps": 97.8, "params_b": 1.5, "accel": "Apple M5 4P+6E+10GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 346.0, "tps": 63.4, "params_b": 1.5, "accel": "Tesla P40", "model": "Llama 3.2 1B Instruct"}, {"bw": 912.0, "tps": 81.32, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 3080 Ti", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 912.0, "tps": 211.66, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 3080 Ti", "model": "Llama 3.2 1B Instruct"}, {"bw": 912.0, "tps": 49.96, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 3080 Ti", "model": "Qwen2.5 14B Instruct"}, {"bw": 672.0, "tps": 47.94, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 4070 Ti SUPER", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 672.0, "tps": 47.96, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 4070 Ti SUPER", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 400.0, "tps": 147.72, "params_b": 1.5, "accel": "Apple M2 Max 8P+4E+38GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 614.0, "tps": 36.31, "params_b": 14.77, "accel": "Apple M5 Max 6P+12E+40GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 288.0, "tps": 176.36, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 4060 Ti", "model": "Llama 3.2 1B Instruct"}, {"bw": 288.0, "tps": 45.36, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 4060 Ti", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 288.0, "tps": 45.38, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 4060 Ti", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 288.0, "tps": 176.03, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 4060 Ti", "model": "Llama 3.2 1B Instruct"}, {"bw": 288.0, "tps": 174.21, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 4060 Ti", "model": "Llama 3.2 1B Instruct"}, {"bw": 546.0, "tps": 26.73, "params_b": 14.77, "accel": "Apple M4 Max 10P+4E+32GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 273.0, "tps": 30.92, "params_b": 8.03, "accel": "Apple M4 Pro 8P+4E+16GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 273.0, "tps": 16.93, "params_b": 14.77, "accel": "Apple M4 Pro 8P+4E+16GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 120.0, "tps": 18.1, "params_b": 8.03, "accel": "Apple M4 4P+6E+10GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 120.0, "tps": 78.37, "params_b": 1.5, "accel": "Apple M4 4P+6E+10GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 100.0, "tps": 21.63, "params_b": 8.03, "accel": "Apple M2 4P+4E+10GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 100.0, "tps": 59.96, "params_b": 1.5, "accel": "Apple M2 4P+4E+10GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 273.0, "tps": 16.51, "params_b": 14.77, "accel": "Apple M4 Pro 8P+4E+16GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 546.0, "tps": 30.37, "params_b": 14.77, "accel": "Apple M4 Max 12P+4E+40GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 100.0, "tps": 13.7, "params_b": 8.03, "accel": "Apple M3 4P+4E+8GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 100.0, "tps": 60.31, "params_b": 1.5, "accel": "Apple M3 4P+4E+8GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 896.0, "tps": 253.98, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 5070 Ti", "model": "Llama 3.2 1B Instruct"}, {"bw": 512.0, "tps": 39.19, "params_b": 8.03, "accel": "AMD Radeon RX 6800 XT", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 672.0, "tps": 129.12, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 4070 Ti SUPER", "model": "Llama 3.2 1B Instruct"}, {"bw": 546.0, "tps": 180.2, "params_b": 1.5, "accel": "Apple M4 Max 12P+4E+40GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 546.0, "tps": 41.78, "params_b": 8.03, "accel": "Apple M4 Max 12P+4E+40GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 546.0, "tps": 18.65, "params_b": 14.77, "accel": "Apple M4 Max 12P+4E+40GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 614.0, "tps": 61.63, "params_b": 8.03, "accel": "Apple M5 Max 6P+12E+32GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 614.0, "tps": 228.99, "params_b": 1.5, "accel": "Apple M5 Max 6P+12E+32GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 614.0, "tps": 34.29, "params_b": 14.77, "accel": "Apple M5 Max 6P+12E+32GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 672.0, "tps": 128.73, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 4070 Ti SUPER", "model": "Llama 3.2 1B Instruct"}, {"bw": 360.0, "tps": 32.78, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 3060", "model": "Qwen2.5 14B Instruct"}, {"bw": 360.0, "tps": 57.5, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 3060", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 360.0, "tps": 52.79, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 3060", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 360.0, "tps": 52.99, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 3060", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 360.0, "tps": 52.23, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 3060", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 672.0, "tps": 48.13, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 4070 Ti SUPER", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 320.0, "tps": 75.43, "params_b": 1.5, "accel": "NVIDIA GeForce GTX 1080", "model": "Llama 3.2 1B Instruct"}, {"bw": 672.0, "tps": 129.31, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 4070 Ti SUPER", "model": "Llama 3.2 1B Instruct"}, {"bw": 672.0, "tps": 129.43, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 4070 Ti SUPER", "model": "Llama 3.2 1B Instruct"}, {"bw": 672.0, "tps": 48.31, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 4070 Ti SUPER", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 672.0, "tps": 48.21, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 4070 Ti SUPER", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 672.0, "tps": 48.15, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 4070 Ti SUPER", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 672.0, "tps": 26.72, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 4070 Ti SUPER", "model": "Qwen2.5 14B Instruct"}, {"bw": 717.0, "tps": 48.89, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 4080", "model": "Qwen2.5 14B Instruct"}, {"bw": 717.0, "tps": 48.24, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 4080", "model": "Qwen2.5 14B Instruct"}, {"bw": 400.0, "tps": 82.98, "params_b": 1.5, "accel": "Apple M1 Max 8P+2E+24GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 960.0, "tps": 28.94, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 5080", "model": "Qwen2.5 14B Instruct"}, {"bw": 912.0, "tps": 36.77, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 3080 Ti", "model": "Qwen2.5 14B Instruct"}, {"bw": 546.0, "tps": 48.71, "params_b": 8.03, "accel": "Apple M4 Max 10P+4E+32GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 546.0, "tps": 48.32, "params_b": 8.03, "accel": "Apple M4 Max 10P+4E+32GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 256.0, "tps": 43.35, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 4060 Laptop GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 448.0, "tps": 16.64, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 5060 Ti", "model": "Qwen2.5 14B Instruct"}, {"bw": 360.0, "tps": 29.51, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 3060", "model": "Qwen2.5 14B Instruct"}, {"bw": 360.0, "tps": 52.4, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 3060", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 360.0, "tps": 211.24, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 3060", "model": "Llama 3.2 1B Instruct"}, {"bw": 273.0, "tps": 111.94, "params_b": 1.5, "accel": "Apple M4 Pro 8P+4E+16GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 624.0, "tps": 57.11, "params_b": 3.61, "accel": "AMD Radeon RX 7800 XT", "model": "Llama 3.2 3B Instruct"}, {"bw": 624.0, "tps": 44.09, "params_b": 7.62, "accel": "AMD Radeon RX 7800 XT", "model": "Qwen2.5 7B Instruct"}, {"bw": 624.0, "tps": 46.81, "params_b": 6.74, "accel": "AMD Radeon RX 7800 XT", "model": "CodeLlama 7b Instruct Hf"}, {"bw": 624.0, "tps": 24.08, "params_b": 14.77, "accel": "AMD Radeon RX 7800 XT", "model": "Qwen3 14B"}, {"bw": 624.0, "tps": 22.81, "params_b": 12.77, "accel": "AMD Radeon RX 7800 XT", "model": "Gemma 3 12b It Heretic v2"}, {"bw": 100.0, "tps": 48.3, "params_b": 1.5, "accel": "Apple M2 4P+4E+10GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 400.0, "tps": 18.75, "params_b": 14.77, "accel": "Apple M2 Max 8P+4E+38GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 112.0, "tps": 46.7, "params_b": 1.5, "accel": "NVIDIA GeForce GTX 1050 Ti", "model": "Llama 3.2 1B Instruct"}, {"bw": 672.0, "tps": 36.66, "params_b": 14.77, "accel": "Quadro RTX 8000", "model": "Qwen2.5 14B Instruct"}, {"bw": 672.0, "tps": 218.08, "params_b": 1.5, "accel": "Quadro RTX 8000", "model": "Llama 3.2 1B Instruct"}, {"bw": 100.0, "tps": 51.4, "params_b": 14.77, "accel": "Tesla V100-SXM2-32GB", "model": "Qwen2.5 14B Instruct"}, {"bw": 100.0, "tps": 87.97, "params_b": 8.03, "accel": "Tesla V100-SXM2-32GB", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 100.0, "tps": 272.33, "params_b": 1.5, "accel": "Tesla V100-SXM2-32GB", "model": "Llama 3.2 1B Instruct"}, {"bw": 89.0, "tps": 1.94, "params_b": 14.77, "accel": "AMD Radeon 780M Graphics", "model": "Qwen2.5 14B Instruct"}, {"bw": 192.0, "tps": 282.42, "params_b": 0.11, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "Rain"}, {"bw": 192.0, "tps": 289.92, "params_b": 0.12, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "Jeeves Small 100M"}, {"bw": 192.0, "tps": 126.68, "params_b": 0.16, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "OpenCLAW SEED 135M"}, {"bw": 100.0, "tps": 43.53, "params_b": 1.5, "accel": "Apple M2 4P+4E+8GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 200.0, "tps": 66.84, "params_b": 1.5, "accel": "Apple M1 Pro 8P+2E+16GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 336.0, "tps": 209.96, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 3060 Laptop GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 448.0, "tps": 33.15, "params_b": 14.77, "accel": "NVIDIA RTX A4000", "model": "Qwen2.5 14B Instruct"}, {"bw": 448.0, "tps": 59.03, "params_b": 8.03, "accel": "NVIDIA RTX A4000", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 512.0, "tps": 96.09, "params_b": 1.5, "accel": "AMD Radeon RX 6800 XT", "model": "Llama 3.2 1B Instruct"}, {"bw": 192.0, "tps": 102.37, "params_b": 1.51, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "Titulm Mpt 1b v2.0"}, {"bw": 192.0, "tps": 1419.66, "params_b": 0.01, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "Tiny Mpt Random Remote Code"}, {"bw": 192.0, "tps": 807.49, "params_b": 0.01, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "Tiny Lm 8M"}, {"bw": 192.0, "tps": 362.6, "params_b": 0.02, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "Stentor 12M Instruct"}, {"bw": 192.0, "tps": 92.89, "params_b": 1.59, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "Refact 1_6B Fim"}, {"bw": 192.0, "tps": 146.47, "params_b": 0.06, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "SmollerLM-48M-Instruct-ft-sft"}, {"bw": 192.0, "tps": 219.76, "params_b": 0.31, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "OpenELM 270M Instruct"}, {"bw": 192.0, "tps": 704.05, "params_b": 0.09, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "NexTrim 50M"}, {"bw": 192.0, "tps": 475.51, "params_b": 0.06, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "mistral-60m"}, {"bw": 192.0, "tps": 1528.63, "params_b": 0.04, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "llama-39m"}, {"bw": 192.0, "tps": 73.14, "params_b": 2.32, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "Kakaocorp.Kanana 1.5 2.1b Instruct 2505"}, {"bw": 448.0, "tps": 230.68, "params_b": 1.5, "accel": "NVIDIA RTX A4000", "model": "Llama 3.2 1B Instruct"}, {"bw": 192.0, "tps": 525.27, "params_b": 0.07, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "DAC60M"}, {"bw": 273.0, "tps": 118.41, "params_b": 1.5, "accel": "Apple M4 Pro 10P+4E+20GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 68.0, "tps": 4.82, "params_b": 14.77, "accel": "Apple M1 4P+4E+7GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 68.0, "tps": 9.69, "params_b": 8.03, "accel": "Apple M1 4P+4E+7GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 68.0, "tps": 38.52, "params_b": 1.5, "accel": "Apple M1 4P+4E+7GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 400.0, "tps": 23.35, "params_b": 14.77, "accel": "Apple M2 Max 8P+4E+38GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 672.0, "tps": 29.64, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 4070 Ti SUPER", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 100.0, "tps": 15.6, "params_b": 8.03, "accel": "Apple M3 4P+4E+10GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 100.0, "tps": 11.83, "params_b": 8.03, "accel": "Apple M3 4P+4E", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 512.0, "tps": 22.33, "params_b": 14.77, "accel": "AMD Radeon RX 6800 XT", "model": "Qwen2.5 14B Instruct"}, {"bw": 512.0, "tps": 38.22, "params_b": 8.03, "accel": "AMD Radeon RX 6800 XT", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 512.0, "tps": 117.88, "params_b": 1.5, "accel": "AMD Radeon RX 6800 XT", "model": "Llama 3.2 1B Instruct"}, {"bw": 273.0, "tps": 30.5, "params_b": 8.03, "accel": "Apple M4 Pro 8P+4E+16GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 672.0, "tps": 133.1, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 4070 Ti SUPER", "model": "Llama 3.2 1B Instruct"}, {"bw": 672.0, "tps": 48.76, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 4070 Ti SUPER", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 672.0, "tps": 26.6, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 4070 Ti SUPER", "model": "Qwen2.5 14B Instruct"}, {"bw": 760.0, "tps": 79.03, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 3080", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 760.0, "tps": 228.35, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 3080", "model": "Llama 3.2 1B Instruct"}, {"bw": 760.0, "tps": 234.6, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 3080", "model": "Llama 3.2 1B Instruct"}, {"bw": 760.0, "tps": 80.31, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 3080", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 736.0, "tps": 80.1, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 4080 SUPER", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 760.0, "tps": 74.85, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 3080", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 736.0, "tps": 77.28, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 4080 SUPER", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 760.0, "tps": 233.44, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 3080", "model": "Llama 3.2 1B Instruct"}, {"bw": 736.0, "tps": 247.22, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 4080 SUPER", "model": "Llama 3.2 1B Instruct"}, {"bw": 546.0, "tps": 180.38, "params_b": 1.5, "accel": "Apple M4 Max 12P+4E+40GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 736.0, "tps": 46.15, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 4080 SUPER", "model": "Qwen2.5 14B Instruct"}, {"bw": 760.0, "tps": 46.62, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 3080", "model": "Qwen2.5 14B Instruct"}, {"bw": 546.0, "tps": 51.23, "params_b": 8.03, "accel": "Apple M4 Max 12P+4E+40GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 546.0, "tps": 28.13, "params_b": 14.77, "accel": "Apple M4 Max 12P+4E+40GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 89.0, "tps": 12.05, "params_b": 1.5, "accel": "AMD Radeon 780M Graphics", "model": "Llama 3.2 1B Instruct"}, {"bw": 288.0, "tps": 23.36, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 4060 Ti", "model": "Qwen2.5 14B Instruct"}, {"bw": 150.0, "tps": 11.81, "params_b": 14.77, "accel": "Apple M3 Pro 5P+6E+14GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 200.0, "tps": 72.49, "params_b": 1.5, "accel": "Apple M1 Pro 8P+2E+16GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 120.0, "tps": 8.51, "params_b": 14.77, "accel": "Apple M4 4P+6E+10GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 400.0, "tps": 123.31, "params_b": 1.5, "accel": "Apple M2 Max 8P+4E+30GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 288.0, "tps": 11.94, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 4060 Ti", "model": "Qwen3 14B"}, {"bw": 273.0, "tps": 117.42, "params_b": 1.5, "accel": "Apple M4 Pro 10P+4E+20GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 273.0, "tps": 117.5, "params_b": 1.5, "accel": "Apple M4 Pro 10P+4E+20GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 400.0, "tps": 21.06, "params_b": 14.77, "accel": "Apple M1 Max 8P+2E+32GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 153.0, "tps": 100.51, "params_b": 1.5, "accel": "Apple M5 4P+6E+10GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 400.0, "tps": 123.34, "params_b": 1.5, "accel": "Apple M1 Max 8P+2E+32GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 153.0, "tps": 22.37, "params_b": 8.03, "accel": "Apple M5 4P+6E+10GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 400.0, "tps": 37.7, "params_b": 8.03, "accel": "Apple M1 Max 8P+2E+32GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 153.0, "tps": 22.31, "params_b": 8.03, "accel": "Apple M5 4P+6E+10GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 400.0, "tps": 20.88, "params_b": 14.77, "accel": "Apple M1 Max 8P+2E+32GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 153.0, "tps": 22.2, "params_b": 8.03, "accel": "Apple M5 4P+6E+10GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 153.0, "tps": 11.95, "params_b": 14.77, "accel": "Apple M5 4P+6E+10GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 400.0, "tps": 38.08, "params_b": 8.03, "accel": "Apple M1 Max 8P+2E+32GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 273.0, "tps": 18.01, "params_b": 14.77, "accel": "Apple M4 Pro 10P+4E+20GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 273.0, "tps": 32.79, "params_b": 8.03, "accel": "Apple M4 Pro 10P+4E+20GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 273.0, "tps": 118.34, "params_b": 1.5, "accel": "Apple M4 Pro 10P+4E+20GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 1792.0, "tps": 23.16, "params_b": 14.77, "accel": "NVIDIA RTX PRO 6000 Blackwell Max-Q Workstation ", "model": "Qwen2.5 14B Instruct"}, {"bw": 400.0, "tps": 37.53, "params_b": 8.03, "accel": "Apple M1 Max 8P+2E+32GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 400.0, "tps": 20.42, "params_b": 14.77, "accel": "Apple M1 Max 8P+2E+32GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 153.0, "tps": 96.23, "params_b": 1.5, "accel": "Apple M5 4P+6E+10GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 153.0, "tps": 11.12, "params_b": 14.77, "accel": "Apple M5 4P+6E+10GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 153.0, "tps": 99.09, "params_b": 1.5, "accel": "Apple M5 4P+6E+10GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 68.0, "tps": 14.55, "params_b": 8.03, "accel": "Apple M1 4P+4E+8GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 68.0, "tps": 41.43, "params_b": 1.5, "accel": "Apple M1 4P+4E+8GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 1792.0, "tps": 23.13, "params_b": 14.77, "accel": "NVIDIA RTX PRO 6000 Blackwell Max-Q Workstation ", "model": "Qwen2.5 14B Instruct"}, {"bw": 546.0, "tps": 26.87, "params_b": 14.77, "accel": "Apple M4 Max 10P+4E+32GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 400.0, "tps": 135.01, "params_b": 1.5, "accel": "Apple M2 Max 8P+4E+30GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 546.0, "tps": 167.21, "params_b": 1.5, "accel": "Apple M4 Max 10P+4E+32GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 546.0, "tps": 48.72, "params_b": 8.03, "accel": "Apple M4 Max 10P+4E+32GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 120.0, "tps": 66.3, "params_b": 1.5, "accel": "Apple M4 4P+6E+8GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 120.0, "tps": 15.27, "params_b": 8.03, "accel": "Apple M4 4P+6E+8GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 192.0, "tps": 114.61, "params_b": 0.75, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "Qwen3 0.6B"}, {"bw": 192.0, "tps": 186.87, "params_b": 0.31, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "OpenELM 270M"}, {"bw": 192.0, "tps": 229.82, "params_b": 0.38, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "Jais Family 256m Chat"}, {"bw": 192.0, "tps": 628.25, "params_b": 0.07, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "GPTNeoX Spanish_Poet 70m"}, {"bw": 192.0, "tps": 301.31, "params_b": 0.16, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "Gpt2"}, {"bw": 192.0, "tps": 173.43, "params_b": 0.51, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "h2o-danube3-500m-chat"}, {"bw": 192.0, "tps": 189.15, "params_b": 0.51, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "h2o-danube3-500m-chat"}, {"bw": 192.0, "tps": 201.53, "params_b": 0.51, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "h2o-danube3-500m-chat"}, {"bw": 192.0, "tps": 199.89, "params_b": 0.51, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "h2o-danube3-500m-chat"}, {"bw": 192.0, "tps": 195.73, "params_b": 0.51, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "h2o-danube3-500m-chat"}, {"bw": 192.0, "tps": 204.41, "params_b": 0.51, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "h2o-danube3-500m-chat"}, {"bw": 192.0, "tps": 202.93, "params_b": 0.51, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "h2o-danube3-500m-chat"}, {"bw": 192.0, "tps": 197.84, "params_b": 0.51, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "h2o-danube3-500m-chat"}, {"bw": 192.0, "tps": 205.75, "params_b": 0.51, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "h2o-danube3-500m-chat"}, {"bw": 192.0, "tps": 204.2, "params_b": 0.51, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "h2o-danube3-500m-chat"}, {"bw": 192.0, "tps": 204.05, "params_b": 0.51, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "h2o-danube3-500m-chat"}, {"bw": 192.0, "tps": 200.92, "params_b": 0.51, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "h2o-danube3-500m-chat"}, {"bw": 192.0, "tps": 196.21, "params_b": 0.51, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "h2o-danube3-500m-chat"}, {"bw": 192.0, "tps": 210.13, "params_b": 0.51, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "h2o-danube3-500m-chat"}, {"bw": 192.0, "tps": 210.97, "params_b": 0.51, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "h2o-danube3-500m-chat"}, {"bw": 192.0, "tps": 175.02, "params_b": 0.51, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "h2o-danube3-500m-chat"}, {"bw": 192.0, "tps": 28.42, "params_b": 0.51, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "h2o-danube3-500m-chat"}, {"bw": 192.0, "tps": 26.29, "params_b": 0.51, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "h2o-danube3-500m-chat"}, {"bw": 192.0, "tps": 26.32, "params_b": 0.51, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "h2o-danube3-500m-chat"}, {"bw": 192.0, "tps": 24.94, "params_b": 0.51, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "h2o-danube3-500m-chat"}, {"bw": 192.0, "tps": 30.26, "params_b": 0.51, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "h2o-danube3-500m-chat"}, {"bw": 273.0, "tps": 123.49, "params_b": 1.5, "accel": "Apple M4 Pro 10P+4E+20GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 200.0, "tps": 23.62, "params_b": 8.03, "accel": "Apple M2 Pro 6P+4E+16GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 192.0, "tps": 28.87, "params_b": 0.51, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "h2o-danube3-500m-chat"}, {"bw": 192.0, "tps": 34.93, "params_b": 0.51, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "h2o-danube3-500m-chat"}, {"bw": 192.0, "tps": 30.1, "params_b": 0.51, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "h2o-danube3-500m-chat"}, {"bw": 192.0, "tps": 30.51, "params_b": 0.51, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "h2o-danube3-500m-chat"}, {"bw": 192.0, "tps": 38.64, "params_b": 0.51, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "h2o-danube3-500m-chat"}, {"bw": 192.0, "tps": 30.42, "params_b": 0.51, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "h2o-danube3-500m-chat"}, {"bw": 192.0, "tps": 80.25, "params_b": 0.51, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "h2o-danube3-500m-chat"}, {"bw": 192.0, "tps": 135.63, "params_b": 0.51, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "h2o-danube3-500m-chat"}, {"bw": 192.0, "tps": 47.78, "params_b": 0.51, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "h2o-danube3-500m-chat"}, {"bw": 192.0, "tps": 45.79, "params_b": 4.41, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "Rio 3.0 Open Mini"}, {"bw": 192.0, "tps": 45.33, "params_b": 4.41, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "Qwen3 4B Thinking 2507"}, {"bw": 192.0, "tps": 106.39, "params_b": 0.75, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "MinerU HTML"}, {"bw": 192.0, "tps": 143.12, "params_b": 0.44, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "DMindAI.DMind 3 Nano"}, {"bw": 192.0, "tps": 138.9, "params_b": 0.44, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "Distil Home Assistant Functiongemma"}, {"bw": 192.0, "tps": 90.32, "params_b": 1.78, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "DeepSeek R1 Distill Qwen 1.5B"}, {"bw": 192.0, "tps": 29.71, "params_b": 8.19, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "DeepSeek R1 0528 Qwen3 8B"}, {"bw": 192.0, "tps": 105.85, "params_b": 0.75, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "DeepBrainz R1 0.6B v2"}, {"bw": 192.0, "tps": 137.84, "params_b": 0.44, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "Gemma 3 270m It"}, {"bw": 192.0, "tps": 86.99, "params_b": 1.3, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "Gemma 3 1b It"}, {"bw": 192.0, "tps": 45.46, "params_b": 4.55, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "Gemma 3 4b It"}, {"bw": 273.0, "tps": 34.17, "params_b": 8.03, "accel": "Apple M4 Pro 10P+4E+20GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 896.0, "tps": 27.62, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 5070 Ti", "model": "Qwen2.5 14B Instruct"}, {"bw": 273.0, "tps": 18.64, "params_b": 14.77, "accel": "Apple M4 Pro 10P+4E+20GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 896.0, "tps": 122.83, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 5070 Ti", "model": "Llama 3.2 1B Instruct"}, {"bw": 896.0, "tps": 48.65, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 5070 Ti", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 896.0, "tps": 26.94, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 5070 Ti", "model": "Qwen2.5 14B Instruct"}, {"bw": 448.0, "tps": 31.34, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 5060 Ti", "model": "Qwen2.5 14B Instruct"}, {"bw": 448.0, "tps": 190.67, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 5060 Ti", "model": "Llama 3.2 1B Instruct"}, {"bw": 896.0, "tps": 23.2, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 5070 Ti", "model": "Qwen2.5 14B Instruct"}, {"bw": 896.0, "tps": 41.89, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 5070 Ti", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 896.0, "tps": 105.75, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 5070 Ti", "model": "Llama 3.2 1B Instruct"}, {"bw": 1792.0, "tps": 62.32, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 5090", "model": "Qwen2.5 14B Instruct"}, {"bw": 1792.0, "tps": 214.19, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 5090", "model": "Llama 3.2 1B Instruct"}, {"bw": 100.0, "tps": 11.13, "params_b": 8.03, "accel": "Apple M3 4P+4E+10GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 100.0, "tps": 66.31, "params_b": 1.5, "accel": "Apple M3 4P+4E+10GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 192.0, "tps": 35.44, "params_b": 0.16, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "SmolLM2 135M Instruct"}, {"bw": 192.0, "tps": 35.02, "params_b": 0.16, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "SmolLM2 135M Instruct"}, {"bw": 192.0, "tps": 151.6, "params_b": 0.16, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "SmolLM2 135M Instruct"}, {"bw": 192.0, "tps": 509.26, "params_b": 0.1, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "Minueza 2 96M Instruct Variant 03"}, {"bw": 192.0, "tps": 502.35, "params_b": 0.1, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "Minueza 2 96M Instruct Variant 03"}, {"bw": 192.0, "tps": 502.33, "params_b": 0.1, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "Minueza 2 96M Instruct Variant 03"}, {"bw": 192.0, "tps": 517.05, "params_b": 0.1, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "Minueza 2 96M Instruct Variant 03"}, {"bw": 192.0, "tps": 520.86, "params_b": 0.1, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "Minueza 2 96M Instruct Variant 03"}, {"bw": 192.0, "tps": 517.0, "params_b": 0.1, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "Minueza 2 96M Instruct Variant 03"}, {"bw": 192.0, "tps": 513.23, "params_b": 0.1, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "Minueza 2 96M Instruct Variant 03"}, {"bw": 192.0, "tps": 510.15, "params_b": 0.1, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "Minueza 2 96M Instruct Variant 03"}, {"bw": 192.0, "tps": 521.72, "params_b": 0.1, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "Minueza 2 96M Instruct Variant 03"}, {"bw": 192.0, "tps": 125.29, "params_b": 0.1, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "Minueza 2 96M Instruct Variant 03"}, {"bw": 192.0, "tps": 174.92, "params_b": 0.1, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "Minueza 2 96M Instruct Variant 03"}, {"bw": 192.0, "tps": 171.99, "params_b": 0.1, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "Minueza 2 96M Instruct Variant 03"}, {"bw": 192.0, "tps": 123.4, "params_b": 0.1, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "Minueza 2 96M Instruct Variant 03"}, {"bw": 192.0, "tps": 125.36, "params_b": 0.1, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "Minueza 2 96M Instruct Variant 03"}, {"bw": 192.0, "tps": 128.35, "params_b": 0.1, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "Minueza 2 96M Instruct Variant 03"}, {"bw": 192.0, "tps": 239.07, "params_b": 0.1, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "Minueza 2 96M Instruct Variant 03"}, {"bw": 192.0, "tps": 846.11, "params_b": 0.0, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "llama"}, {"bw": 192.0, "tps": 844.33, "params_b": 0.0, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "llama"}, {"bw": 192.0, "tps": 279.25, "params_b": 0.0, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "llama"}, {"bw": 192.0, "tps": 505.22, "params_b": 0.1, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "Minueza 2 96M Instruct Variant 04"}, {"bw": 192.0, "tps": 504.05, "params_b": 0.1, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "Minueza 2 96M Instruct Variant 02"}, {"bw": 192.0, "tps": 1873.04, "params_b": 0.0, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "TinyStories 656K"}, {"bw": 192.0, "tps": 1889.62, "params_b": 0.0, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "TinyStories 656K"}, {"bw": 192.0, "tps": 1869.43, "params_b": 0.0, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "TinyStories 656K"}, {"bw": 192.0, "tps": 1878.84, "params_b": 0.0, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "TinyStories 656K"}, {"bw": 192.0, "tps": 1890.19, "params_b": 0.0, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "TinyStories 656K"}, {"bw": 192.0, "tps": 1855.74, "params_b": 0.0, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "TinyStories 656K"}, {"bw": 192.0, "tps": 1884.73, "params_b": 0.0, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "TinyStories 656K"}, {"bw": 192.0, "tps": 1890.29, "params_b": 0.0, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "TinyStories 656K"}, {"bw": 192.0, "tps": 1905.21, "params_b": 0.0, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "TinyStories 656K"}, {"bw": 192.0, "tps": 1887.03, "params_b": 0.0, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "TinyStories 656K"}, {"bw": 192.0, "tps": 1859.13, "params_b": 0.0, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "TinyStories 656K"}, {"bw": 192.0, "tps": 557.3, "params_b": 0.0, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "TinyStories 656K"}, {"bw": 192.0, "tps": 854.09, "params_b": 0.0, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "TinyStories 656K"}, {"bw": 192.0, "tps": 865.34, "params_b": 0.0, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "TinyStories 656K"}, {"bw": 192.0, "tps": 897.87, "params_b": 0.0, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "TinyStories 656K"}, {"bw": 192.0, "tps": 645.84, "params_b": 0.0, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "TinyStories 656K"}, {"bw": 192.0, "tps": 624.83, "params_b": 0.0, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "TinyStories 656K"}, {"bw": 192.0, "tps": 651.45, "params_b": 0.0, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "TinyStories 656K"}, {"bw": 192.0, "tps": 616.61, "params_b": 0.0, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "TinyStories 656K"}, {"bw": 192.0, "tps": 616.87, "params_b": 0.0, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "TinyStories 656K"}, {"bw": 192.0, "tps": 728.31, "params_b": 0.0, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "TinyStories 656K"}, {"bw": 192.0, "tps": 1823.06, "params_b": 0.0, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "Raincandy U TinyStories 656K"}, {"bw": 192.0, "tps": 1885.42, "params_b": 0.0, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "Raincandy U TinyStories 656K"}, {"bw": 192.0, "tps": 640.43, "params_b": 0.0, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "Raincandy U TinyStories 656K"}, {"bw": 936.0, "tps": 60.64, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 3090", "model": "Qwen2.5 14B Instruct"}, {"bw": 200.0, "tps": 81.06, "params_b": 1.5, "accel": "Apple M1 Pro 8P+2E+16GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 273.0, "tps": 18.14, "params_b": 14.77, "accel": "Apple M4 Pro 10P+4E+20GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 400.0, "tps": 20.91, "params_b": 14.77, "accel": "Apple M1 Max 8P+2E+32GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 448.0, "tps": 63.84, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 3060 Ti", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 448.0, "tps": 178.43, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 3060 Ti", "model": "Llama 3.2 1B Instruct"}, {"bw": 273.0, "tps": 32.64, "params_b": 8.03, "accel": "Apple M4 Pro 10P+4E+20GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 273.0, "tps": 118.83, "params_b": 1.5, "accel": "Apple M4 Pro 10P+4E+20GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 400.0, "tps": 124.32, "params_b": 1.5, "accel": "Apple M1 Max 8P+2E+32GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 273.0, "tps": 118.96, "params_b": 1.5, "accel": "Apple M4 Pro 10P+4E+20GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 400.0, "tps": 38.4, "params_b": 8.03, "accel": "Apple M1 Max 8P+2E+32GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 400.0, "tps": 128.11, "params_b": 1.5, "accel": "Apple M1 Max 8P+2E+32GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 273.0, "tps": 120.23, "params_b": 1.5, "accel": "Apple M4 Pro 10P+4E+20GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 1008.0, "tps": 48.05, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 4090", "model": "Qwen2.5 14B Instruct"}, {"bw": 1008.0, "tps": 80.49, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 4090", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 1008.0, "tps": 163.9, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 4090", "model": "Llama 3.2 1B Instruct"}, {"bw": 448.0, "tps": 59.21, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 3060 Ti", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 448.0, "tps": 195.38, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 3060 Ti", "model": "Llama 3.2 1B Instruct"}, {"bw": 192.0, "tps": 459.07, "params_b": 0.1, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "Minueza 2 96M Instruct Variant 03"}, {"bw": 192.0, "tps": 477.59, "params_b": 0.1, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "Minueza 2 96M Instruct Variant 03"}, {"bw": 192.0, "tps": 118.6, "params_b": 0.75, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "LightOnOCR 2 1B"}, {"bw": 192.0, "tps": 92.85, "params_b": 1.78, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "ordis_v355_VarGH_merged"}, {"bw": 192.0, "tps": 56.67, "params_b": 3.93, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "Nanbeige4.1 3B"}, {"bw": 546.0, "tps": 180.0, "params_b": 1.5, "accel": "Apple M4 Max 12P+4E+40GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 400.0, "tps": 16.78, "params_b": 14.77, "accel": "Apple M1 Max 8P+2E+32GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 400.0, "tps": 110.41, "params_b": 1.5, "accel": "Apple M1 Max 8P+2E+32GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 1792.0, "tps": 55.64, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 5090", "model": "Qwen2.5 14B Instruct"}, {"bw": 150.0, "tps": 11.88, "params_b": 14.77, "accel": "Apple M3 Pro 5P+6E+14GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 68.0, "tps": 35.98, "params_b": 1.5, "accel": "Apple M1 4P+4E+7GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 448.0, "tps": 6.16, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 3070", "model": "Qwen2.5 14B Instruct"}, {"bw": 448.0, "tps": 204.01, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 3070", "model": "Llama 3.2 1B Instruct"}, {"bw": 448.0, "tps": 62.71, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 3070", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 448.0, "tps": 63.42, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 3060 Ti", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 448.0, "tps": 242.45, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 3060 Ti", "model": "Llama 3.2 1B Instruct"}, {"bw": 192.0, "tps": 120.84, "params_b": 0.41, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "SmolVLM2 500M Video Instruct"}, {"bw": 192.0, "tps": 38.11, "params_b": 0.42, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "State Spaces Mamba 370m Hf"}, {"bw": 192.0, "tps": 447.57, "params_b": 0.1, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "Pico-OpenLAiNN-100M"}, {"bw": 192.0, "tps": 155.03, "params_b": 0.62, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "Sailor-0.5B-Chat"}, {"bw": 192.0, "tps": 68.42, "params_b": 3.11, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "Neo 3 3B A400M Base"}, {"bw": 192.0, "tps": 152.53, "params_b": 0.14, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "MobileLLM 125M HF"}, {"bw": 192.0, "tps": 152.77, "params_b": 0.1, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "MobileLLM 80M Finetuned"}, {"bw": 192.0, "tps": 299.28, "params_b": 0.25, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "LocutusqueXFelladrin TinyMistral248M Instruct"}, {"bw": 192.0, "tps": 322.87, "params_b": 0.16, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "Pythia 160m Deduped"}, {"bw": 192.0, "tps": 614.97, "params_b": 0.07, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "Pythia 70m Deduped"}, {"bw": 192.0, "tps": 279.98, "params_b": 0.32, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "MiniPLM Qwen 200M"}, {"bw": 192.0, "tps": 75.76, "params_b": 0.19, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "MiniPLM Mamba 130M"}, {"bw": 192.0, "tps": 122.05, "params_b": 0.24, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "MTLM1 200M"}, {"bw": 192.0, "tps": 408.86, "params_b": 0.21, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "Tiny Mistral"}, {"bw": 192.0, "tps": 285.85, "params_b": 0.25, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "TinyMistral 248M v2.5"}, {"bw": 192.0, "tps": 65.4, "params_b": 3.2, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "TowerVision 2B"}, {"bw": 400.0, "tps": 31.19, "params_b": 8.03, "accel": "Apple M2 Max 8P+4E+30GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 400.0, "tps": 122.55, "params_b": 1.5, "accel": "Apple M2 Max 8P+4E+30GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 400.0, "tps": 17.56, "params_b": 14.77, "accel": "Apple M1 Max 8P+2E+24GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 960.0, "tps": 51.43, "params_b": 8.03, "accel": "AMD Radeon RX 7900 XTX", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 546.0, "tps": 30.46, "params_b": 14.77, "accel": "Apple M4 Max 12P+4E+40GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 150.0, "tps": 11.66, "params_b": 14.77, "accel": "Apple M3 Pro 6P+6E+18GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 200.0, "tps": 21.89, "params_b": 8.03, "accel": "Apple M1 Pro 8P+2E+16GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 624.0, "tps": 30.57, "params_b": 8.37, "accel": "AMD Radeon RX 7800 XT", "model": "Granite 3.1 8b Instruct"}, {"bw": 624.0, "tps": 105.13, "params_b": 1.5, "accel": "AMD Radeon RX 7800 XT", "model": "Llama 3.2 1B Instruct"}, {"bw": 624.0, "tps": 112.31, "params_b": 1.5, "accel": "AMD Radeon RX 7800 XT", "model": "Llama 3.2 1B Instruct"}, {"bw": 624.0, "tps": 108.05, "params_b": 1.5, "accel": "AMD Radeon RX 7800 XT", "model": "Llama 3.2 1B Instruct"}, {"bw": 200.0, "tps": 11.88, "params_b": 14.77, "accel": "Apple M1 Pro 8P+2E+16GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 1792.0, "tps": 105.72, "params_b": 1.5, "accel": "NVIDIA RTX PRO 6000 Blackwell Server Edition", "model": "Llama 3.2 1B Instruct"}, {"bw": 192.0, "tps": 49.05, "params_b": 2.74, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "EuroMoE 2.6B A0.6B Instruct Preview"}, {"bw": 192.0, "tps": 59.03, "params_b": 3.18, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "Starcoder2 3b"}, {"bw": 192.0, "tps": 46.7, "params_b": 4.55, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "Gemma-3-4B-It"}, {"bw": 192.0, "tps": 30.07, "params_b": 8.19, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "Nemotron Cascade 8B"}, {"bw": 192.0, "tps": 50.11, "params_b": 4.45, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "Phi-4-Mini-Reasoning"}, {"bw": 484.0, "tps": 153.29, "params_b": 1.5, "accel": "NVIDIA GeForce GTX 1080 Ti", "model": "Llama 3.2 1B Instruct"}, {"bw": 484.0, "tps": 145.23, "params_b": 1.5, "accel": "NVIDIA GeForce GTX 1080 Ti", "model": "Llama 3.2 1B Instruct"}, {"bw": 896.0, "tps": 117.91, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 5070 Ti", "model": "Llama 3.2 1B Instruct"}, {"bw": 400.0, "tps": 20.73, "params_b": 14.77, "accel": "Apple M1 Max 8P+2E+32GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 192.0, "tps": 50.49, "params_b": 4.45, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "Phi 4 Mini Instruct"}, {"bw": 192.0, "tps": 116.32, "params_b": 0.75, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "InternVL3_5 1B"}, {"bw": 192.0, "tps": 166.44, "params_b": 0.44, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "Functiongemma-270M-It"}, {"bw": 192.0, "tps": 29.93, "params_b": 8.19, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "KomdigiUB 8B Instruct DTP"}, {"bw": 192.0, "tps": 94.15, "params_b": 0.83, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "Nusantara 0.8b Indo Chat"}, {"bw": 192.0, "tps": 78.53, "params_b": 0.99, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "Sailor2 1B Chat"}, {"bw": 192.0, "tps": 1374.65, "params_b": 0.07, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "vicuna-68m"}, {"bw": 192.0, "tps": 55.75, "params_b": 3.93, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "Nanbeige4 3B Thinking 2511"}, {"bw": 192.0, "tps": 60.01, "params_b": 3.61, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "Llama 3.2 3B Instruct"}, {"bw": 192.0, "tps": 672.94, "params_b": 0.02, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "Tinyllama 15M"}, {"bw": 192.0, "tps": 527.83, "params_b": 0.0, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "TinyLLama v0"}, {"bw": 192.0, "tps": 642.63, "params_b": 0.01, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "Pythia 14m"}, {"bw": 192.0, "tps": 163.33, "params_b": 0.44, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "DogeAI v1.5 Coder"}, {"bw": 192.0, "tps": 259.84, "params_b": 0.01, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "Llama 2 Tiny 4kv Heads 16layers Random"}, {"bw": 192.0, "tps": 898.6, "params_b": 0.0, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "Bloom"}, {"bw": 192.0, "tps": 468.81, "params_b": 0.51, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "Config.Json"}, {"bw": 192.0, "tps": 832.74, "params_b": 0.02, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "Tiny Random Internlm2"}, {"bw": 192.0, "tps": 1491.54, "params_b": 0.02, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "Tiny_Random_Llama2_New"}, {"bw": 192.0, "tps": 1532.13, "params_b": 0.0, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "C:\\Users\\dn\\Documents\\sharedp\\p\\afriai\\neo\\hf-quanter\\work"}, {"bw": 192.0, "tps": 1492.39, "params_b": 0.01, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "Tiny Random Olmo Hf"}, {"bw": 192.0, "tps": 1169.4, "params_b": 0.02, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "Tiny Random Orion"}, {"bw": 192.0, "tps": 896.83, "params_b": 0.02, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "Tiny Random Qwen3"}, {"bw": 192.0, "tps": 407.33, "params_b": 0.13, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "Tiny Random Stablelm 2"}, {"bw": 192.0, "tps": 233.61, "params_b": 0.1, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "Qwen2 96M"}, {"bw": 192.0, "tps": 134.46, "params_b": 0.63, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "LamoFast_2.0"}, {"bw": 192.0, "tps": 468.6, "params_b": 0.07, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "Lite-Oute-1-65M-Instruct"}, {"bw": 320.0, "tps": 20.22, "params_b": 8.03, "accel": "NVIDIA GeForce GTX 1080", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 192.0, "tps": 696.5, "params_b": 0.01, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "Pythia 14m"}, {"bw": 192.0, "tps": 519.34, "params_b": 0.0, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "TinyLLama v0"}, {"bw": 192.0, "tps": 718.46, "params_b": 0.02, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "Tinyllama 15M"}, {"bw": 192.0, "tps": 413.93, "params_b": 0.01, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "Phi 3 Mini 4k Instruct"}, {"bw": 192.0, "tps": 378.37, "params_b": 0.01, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "AMD Llama 135m"}, {"bw": 192.0, "tps": 378.89, "params_b": 0.01, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "AMD Llama 135m"}, {"bw": 192.0, "tps": 957.74, "params_b": 0.02, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "Tiny Random Qwen3"}, {"bw": 192.0, "tps": 1413.83, "params_b": 0.02, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "Falcon"}, {"bw": 192.0, "tps": 2031.8, "params_b": 0.0, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "Tiny Gpt2"}, {"bw": 192.0, "tps": 1685.54, "params_b": 0.0, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "Tiny Random Phi3ForCausalLM"}, {"bw": 192.0, "tps": 848.98, "params_b": 0.02, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "Tiny Gemma Test"}, {"bw": 192.0, "tps": 486.14, "params_b": 0.02, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "Plyx 15M"}, {"bw": 546.0, "tps": 163.96, "params_b": 1.5, "accel": "Apple M4 Max 10P+4E+32GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 546.0, "tps": 47.45, "params_b": 8.03, "accel": "Apple M4 Max 10P+4E+32GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 192.0, "tps": 3090.78, "params_b": 0.0, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "Tiny Llama Ultra Compact"}, {"bw": 192.0, "tps": 2329.95, "params_b": 0.01, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "Planck-OpenLAiNN-10M"}, {"bw": 192.0, "tps": 349.33, "params_b": 0.16, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "Mamba Mistral Instruct Distill"}, {"bw": 192.0, "tps": 74.23, "params_b": 0.17, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "Mamba 130m Hf"}, {"bw": 192.0, "tps": 55.17, "params_b": 0.36, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "Bag"}, {"bw": 192.0, "tps": 186.69, "params_b": 0.36, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "OuteTTS 0.1 350M Hf"}, {"bw": 192.0, "tps": 118.24, "params_b": 0.75, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "Beck 0.6B"}, {"bw": 192.0, "tps": 146.13, "params_b": 0.16, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "SmolVLM2 256M Video Instruct"}, {"bw": 192.0, "tps": 605.23, "params_b": 0.12, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "Distilgpt2 Tiny Conversational"}, {"bw": 192.0, "tps": 79.05, "params_b": 0.97, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "Qwen3 Coder 0.6B 0.9B 7 1bR5 Nobs F16"}, {"bw": 192.0, "tps": 86.14, "params_b": 1.78, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "VibeThinker 1.5B"}, {"bw": 192.0, "tps": 81.19, "params_b": 1.91, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "OpenCoder 1.5B Instruct"}, {"bw": 192.0, "tps": 82.11, "params_b": 1.91, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "OpenCoder 1.5B Instruct"}, {"bw": 192.0, "tps": 823.5, "params_b": 0.0, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "Experimental Neo TinyStories Korean 800K 20240819"}, {"bw": 192.0, "tps": 113.66, "params_b": 0.75, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "ReasonLite 0.6B"}, {"bw": 192.0, "tps": 75.89, "params_b": 0.06, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "Monad"}, {"bw": 192.0, "tps": 117.37, "params_b": 0.75, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "ReasonLite 0.6B Turbo"}, {"bw": 192.0, "tps": 115.22, "params_b": 0.75, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "Theta Crucis 0.6B Turbo1"}, {"bw": 192.0, "tps": 78.16, "params_b": 3.91, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "Superthoughts Lite v2 MOE Llama3.2"}, {"bw": 192.0, "tps": 45.19, "params_b": 4.41, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "DistilGPT OSS Qwen3 4B"}, {"bw": 192.0, "tps": 42.47, "params_b": 0.75, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "Syngen Reasoning 0.6b"}, {"bw": 192.0, "tps": 66.16, "params_b": 2.63, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "Fijik1.5 2.6b A380M"}, {"bw": 192.0, "tps": 400.04, "params_b": 0.01, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "Granite 3.0 1b A400M Instruct"}, {"bw": 192.0, "tps": 639.1, "params_b": 0.0, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "Granite 3.1 2b Instruct"}, {"bw": 192.0, "tps": 2811.05, "params_b": 0.01, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "Planck-OpenLAiNN-10M"}, {"bw": 192.0, "tps": 657.11, "params_b": 0.0, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "TinyLLama"}, {"bw": 624.0, "tps": 22.73, "params_b": 14.77, "accel": "AMD Radeon RX 7800 XT", "model": "Qwen2.5 14B Instruct"}, {"bw": 192.0, "tps": 100.03, "params_b": 1.3, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "Gemma 3 1b It Qat"}, {"bw": 192.0, "tps": 135.35, "params_b": 0.16, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "SmolDocling 256M Preview"}, {"bw": 192.0, "tps": 162.94, "params_b": 0.44, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "Gemma 3 270m It Qat Q4_0 Unquantized"}, {"bw": 192.0, "tps": 59.31, "params_b": 0.44, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "Gemma-3-270M-It-Qat"}, {"bw": 192.0, "tps": 167.77, "params_b": 0.44, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "Gemma 3 270m Qat Q4_0 Unquantized"}, {"bw": 192.0, "tps": 502.04, "params_b": 0.1, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "Minueza 2 96M Instruct Variant 03"}, {"bw": 192.0, "tps": 674.25, "params_b": 0.0, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "TinyStories-656K"}, {"bw": 192.0, "tps": 1732.4, "params_b": 0.0, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "TinyStories-656K"}, {"bw": 192.0, "tps": 79.88, "params_b": 0.17, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "mamba-130m-hf"}, {"bw": 192.0, "tps": 144.92, "params_b": 0.16, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "SmolLM2 135M Instruct"}, {"bw": 192.0, "tps": 907.93, "params_b": 0.0, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "llama"}, {"bw": 192.0, "tps": 218.72, "params_b": 0.51, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "h2o-danube3-500m-chat"}, {"bw": 192.0, "tps": 337.43, "params_b": 0.13, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "Out_Bak_100M_2K_Code_Iter_160000"}, {"bw": 192.0, "tps": 323.16, "params_b": 0.13, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "Amd Pretrained 135m 2k Slimpajama_No_Book3"}, {"bw": 192.0, "tps": 138.96, "params_b": 1.28, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "AMD OLMo 1B SFT DPO"}, {"bw": 192.0, "tps": 113.01, "params_b": 0.75, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "Astral 0.6B Coder"}, {"bw": 192.0, "tps": 72.2, "params_b": 2.67, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "EXAONE 3.5 2.4B Instruct"}, {"bw": 192.0, "tps": 45.65, "params_b": 4.41, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "Qwen3-4B-Instruct-2507"}, {"bw": 192.0, "tps": 122.08, "params_b": 0.75, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "Qwen3-0.6B"}, {"bw": 192.0, "tps": 129.25, "params_b": 0.16, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "SmolLM2 135M Instruct"}, {"bw": 192.0, "tps": 142.09, "params_b": 0.63, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "Qwen2.5 Coder 0.5B Instruct"}, {"bw": 192.0, "tps": 107.21, "params_b": 1.35, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "Deepseek Coder 1.3b Instruct"}, {"bw": 192.0, "tps": 62.12, "params_b": 1.38, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "Granite 3.1 1b A400M Instruct"}, {"bw": 192.0, "tps": 98.07, "params_b": 1.3, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "Gemma-3-1B-It"}, {"bw": 192.0, "tps": 158.71, "params_b": 0.44, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "Gemma-3-270M-It"}, {"bw": 192.0, "tps": 31.03, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 192.0, "tps": 132.42, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 120.0, "tps": 77.98, "params_b": 1.5, "accel": "Apple M4 4P+6E+10GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 1792.0, "tps": 127.83, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 5090", "model": "Llama 3.2 1B Instruct"}, {"bw": 1792.0, "tps": 43.52, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 5090", "model": "Llama 3.2 1B Instruct"}, {"bw": 273.0, "tps": 117.8, "params_b": 1.5, "accel": "Apple M4 Pro 10P+4E+20GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 936.0, "tps": 48.69, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 3090", "model": "Qwen2.5 14B Instruct"}, {"bw": 546.0, "tps": 30.96, "params_b": 14.77, "accel": "Apple M4 Max 12P+4E+40GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 672.0, "tps": 36.07, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 4070 Ti SUPER", "model": "Qwen2.5 14B Instruct"}, {"bw": 150.0, "tps": 12.03, "params_b": 14.77, "accel": "Apple M3 Pro 6P+6E+18GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 896.0, "tps": 91.85, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 5070 Ti", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 896.0, "tps": 259.93, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 5070 Ti", "model": "Llama 3.2 1B Instruct"}, {"bw": 100.0, "tps": 56.96, "params_b": 1.5, "accel": "Apple M2 4P+4E+10GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 273.0, "tps": 18.21, "params_b": 14.77, "accel": "Apple M4 Pro 10P+4E+20GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 273.0, "tps": 118.59, "params_b": 1.5, "accel": "Apple M4 Pro 10P+4E+20GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 273.0, "tps": 18.26, "params_b": 14.77, "accel": "Apple M4 Pro 10P+4E+20GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 896.0, "tps": 41.16, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 5070 Ti", "model": "Qwen2.5 14B Instruct"}, {"bw": 896.0, "tps": 31.1, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 5070 Ti", "model": "Qwen2.5 14B Instruct"}, {"bw": 896.0, "tps": 36.51, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 5070 Ti", "model": "Qwen2.5 14B Instruct"}, {"bw": 896.0, "tps": 33.09, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 5070 Ti", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 896.0, "tps": 110.25, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 5070 Ti", "model": "Llama 3.2 1B Instruct"}, {"bw": 936.0, "tps": 52.12, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 3090", "model": "Qwen2.5 14B Instruct"}, {"bw": 400.0, "tps": 20.65, "params_b": 14.77, "accel": "Apple M1 Max 8P+2E+32GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 400.0, "tps": 21.39, "params_b": 14.77, "accel": "Apple M1 Max 8P+2E+32GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 546.0, "tps": 23.6, "params_b": 14.77, "accel": "Apple M4 Max 12P+4E+40GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 936.0, "tps": 360.52, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 3090", "model": "Llama 3.2 1B Instruct"}, {"bw": 936.0, "tps": 63.65, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 3090", "model": "Qwen2.5 14B Instruct"}, {"bw": 936.0, "tps": 109.91, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 3090", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 936.0, "tps": 109.26, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 3090", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 273.0, "tps": 24.96, "params_b": 9.4, "accel": "Apple M4 Pro 10P+4E+20GPU", "model": "Glm 4 9b Chat"}, {"bw": 200.0, "tps": 10.76, "params_b": 14.77, "accel": "Apple M1 Pro 6P+2E+14GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 800.0, "tps": 34.21, "params_b": 14.77, "accel": "Apple M2 Ultra 16P+8E+60GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 800.0, "tps": 59.48, "params_b": 8.03, "accel": "Apple M2 Ultra 16P+8E+60GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 800.0, "tps": 174.09, "params_b": 1.5, "accel": "Apple M2 Ultra 16P+8E+60GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 448.0, "tps": 90.08, "params_b": 2.03, "accel": "NVIDIA GeForce RTX 3070", "model": "Qwen3 1.7B"}, {"bw": 448.0, "tps": 104.35, "params_b": 0.75, "accel": "NVIDIA GeForce RTX 3070", "model": "Qwen3 0.6B"}, {"bw": 448.0, "tps": 36.48, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 3070", "model": "Llama3.3 8B Instruct Thinking Heretic Uncensored Claude 4.5 Opus High Reasoning"}, {"bw": 448.0, "tps": 65.93, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 3070", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 448.0, "tps": 258.26, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 3070", "model": "Llama 3.2 1B Instruct"}, {"bw": 448.0, "tps": 22.55, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 5060 Ti", "model": "Qwen2.5 14B Instruct"}, {"bw": 448.0, "tps": 41.06, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 5060 Ti", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 448.0, "tps": 102.77, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 5060 Ti", "model": "Llama 3.2 1B Instruct"}, {"bw": 448.0, "tps": 97.89, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 5060 Ti", "model": "Llama 3.2 1B Instruct"}, {"bw": 224.0, "tps": 34.47, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 3050", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 936.0, "tps": 109.77, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 3090", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 936.0, "tps": 360.1, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 3090", "model": "Llama 3.2 1B Instruct"}, {"bw": 936.0, "tps": 63.73, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 3090", "model": "Qwen2.5 14B Instruct"}, {"bw": 760.0, "tps": 308.15, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 3080", "model": "Llama 3.2 1B Instruct"}, {"bw": 760.0, "tps": 89.7, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 3080", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 896.0, "tps": 20.91, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 5070 Ti", "model": "Qwen2.5 14B Instruct"}, {"bw": 896.0, "tps": 35.44, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 5070 Ti", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 896.0, "tps": 98.26, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 5070 Ti", "model": "Llama 3.2 1B Instruct"}, {"bw": 120.0, "tps": 65.56, "params_b": 1.5, "accel": "Apple M4 4P+6E+8GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 273.0, "tps": 16.13, "params_b": 14.77, "accel": "Apple M4 Pro 8P+4E+16GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 320.0, "tps": 189.28, "params_b": 1.5, "accel": "NVIDIA RTX 4000 SFF Ada Generation", "model": "Llama 3.2 1B Instruct"}, {"bw": 320.0, "tps": 44.11, "params_b": 8.03, "accel": "NVIDIA RTX 4000 SFF Ada Generation", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 320.0, "tps": 24.52, "params_b": 14.77, "accel": "NVIDIA RTX 4000 SFF Ada Generation", "model": "Qwen2.5 14B Instruct"}, {"bw": 616.0, "tps": 39.08, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 2080 Ti", "model": "Qwen2.5 14B Instruct"}, {"bw": 616.0, "tps": 71.85, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 2080 Ti", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 616.0, "tps": 260.17, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 2080 Ti", "model": "Llama 3.2 1B Instruct"}, {"bw": 273.0, "tps": 18.28, "params_b": 14.77, "accel": "Apple M4 Pro 10P+4E+20GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 273.0, "tps": 119.5, "params_b": 1.5, "accel": "Apple M4 Pro 10P+4E+20GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 273.0, "tps": 32.96, "params_b": 8.03, "accel": "Apple M4 Pro 10P+4E+20GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 896.0, "tps": 19.96, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 5070 Ti", "model": "Qwen2.5 14B Instruct"}, {"bw": 896.0, "tps": 33.29, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 5070 Ti", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 896.0, "tps": 86.8, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 5070 Ti", "model": "Llama 3.2 1B Instruct"}, {"bw": 68.0, "tps": 41.2, "params_b": 1.5, "accel": "Apple M1 4P+4E+8GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 896.0, "tps": 91.05, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 5070 Ti", "model": "Llama 3.2 1B Instruct"}, {"bw": 896.0, "tps": 19.96, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 5070 Ti", "model": "Qwen2.5 14B Instruct"}, {"bw": 896.0, "tps": 33.66, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 5070 Ti", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 896.0, "tps": 87.83, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 5070 Ti", "model": "Llama 3.2 1B Instruct"}, {"bw": 448.0, "tps": 258.2, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 3070", "model": "Llama 3.2 1B Instruct"}, {"bw": 448.0, "tps": 152.4, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 3070", "model": "Llama 3.2 1B Instruct"}, {"bw": 448.0, "tps": 128.73, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 3070", "model": "Llama 3.2 1B Instruct"}, {"bw": 448.0, "tps": 257.34, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 3070", "model": "Llama 3.2 1B Instruct"}, {"bw": 448.0, "tps": 66.11, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 3070", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 448.0, "tps": 260.21, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 3070", "model": "Llama 3.2 1B Instruct"}, {"bw": 448.0, "tps": 133.75, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 3070", "model": "Llama 3.2 1B Instruct"}, {"bw": 120.0, "tps": 7.84, "params_b": 14.77, "accel": "Apple M4 4P+6E+8GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 896.0, "tps": 48.65, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 5070 Ti", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 896.0, "tps": 100.48, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 5070 Ti", "model": "Llama 3.2 1B Instruct"}, {"bw": 320.0, "tps": 20.24, "params_b": 8.03, "accel": "NVIDIA GeForce GTX 1080", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 320.0, "tps": 74.84, "params_b": 1.5, "accel": "NVIDIA GeForce GTX 1080", "model": "Llama 3.2 1B Instruct"}, {"bw": 448.0, "tps": 59.13, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 2080", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 448.0, "tps": 221.76, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 2080", "model": "Llama 3.2 1B Instruct"}, {"bw": 100.0, "tps": 60.04, "params_b": 1.5, "accel": "Apple M2 4P+4E+10GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 100.0, "tps": 8.08, "params_b": 14.77, "accel": "Apple M2 4P+4E+10GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 936.0, "tps": 56.68, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 3090", "model": "Qwen2.5 14B Instruct"}, {"bw": 100.0, "tps": 15.22, "params_b": 8.03, "accel": "Apple M2 4P+4E+10GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 912.0, "tps": 58.91, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 3080 Ti", "model": "Qwen2.5 14B Instruct"}, {"bw": 576.0, "tps": 207.48, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 4090 Laptop GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 273.0, "tps": 111.19, "params_b": 1.5, "accel": "Apple M4 Pro 8P+4E+16GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 200.0, "tps": 100.49, "params_b": 1.5, "accel": "Apple M2 Pro 8P+4E+19GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 936.0, "tps": 55.51, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 3090", "model": "Qwen2.5 14B Instruct"}, {"bw": 336.0, "tps": 156.6, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 3060 Laptop GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 100.0, "tps": 12.91, "params_b": 8.03, "accel": "Apple M2 4P+4E+8GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 100.0, "tps": 52.84, "params_b": 1.5, "accel": "Apple M2 4P+4E+8GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 336.0, "tps": 160.13, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 3060 Laptop GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 336.0, "tps": 44.54, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 3060 Laptop GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 336.0, "tps": 45.13, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 3060 Laptop GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 336.0, "tps": 1.09, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 3060 Laptop GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 336.0, "tps": 153.4, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 3060 Laptop GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 336.0, "tps": 45.52, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 3060 Laptop GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 336.0, "tps": 45.17, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 3060 Laptop GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 336.0, "tps": 162.49, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 3060 Laptop GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 400.0, "tps": 17.36, "params_b": 14.77, "accel": "Apple M1 Max 8P+2E+24GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 1792.0, "tps": 84.77, "params_b": 14.77, "accel": "NVIDIA RTX PRO 6000 Blackwell Workstation Editio", "model": "Qwen2.5 14B Instruct"}, {"bw": 100.0, "tps": 57.73, "params_b": 1.5, "accel": "Apple M2 4P+4E+10GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 120.0, "tps": 6.63, "params_b": 14.77, "accel": "Apple M4 4P+6E+8GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 336.0, "tps": 124.99, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 2060", "model": "Llama 3.2 1B Instruct"}, {"bw": 912.0, "tps": 12.22, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 3080 Ti", "model": "Qwen2.5 14B Instruct"}, {"bw": 3350.0, "tps": 78.35, "params_b": 1.5, "accel": "NVIDIA H100 NVL", "model": "Llama 3.2 1B Instruct"}, {"bw": 3350.0, "tps": 20.49, "params_b": 14.77, "accel": "NVIDIA H100 NVL", "model": "Qwen2.5 14B Instruct"}, {"bw": 3350.0, "tps": 88.67, "params_b": 1.5, "accel": "NVIDIA H100 NVL", "model": "Llama 3.2 1B Instruct"}, {"bw": 448.0, "tps": 53.87, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 3080 Laptop GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 120.0, "tps": 74.57, "params_b": 1.5, "accel": "Apple M4 4P+6E+10GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 120.0, "tps": 73.95, "params_b": 1.5, "accel": "Apple M4 4P+6E+10GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 546.0, "tps": 29.08, "params_b": 14.77, "accel": "Apple M4 Max 12P+4E+40GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 546.0, "tps": 28.49, "params_b": 14.77, "accel": "Apple M4 Max 12P+4E+40GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 448.0, "tps": 30.47, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 5060 Ti", "model": "Qwen2.5 14B Instruct"}, {"bw": 496.0, "tps": 4.47, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 2080 SUPER", "model": "Qwen2.5 14B Instruct"}, {"bw": 496.0, "tps": 59.8, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 2080 SUPER", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 496.0, "tps": 186.39, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 2080 SUPER", "model": "Llama 3.2 1B Instruct"}, {"bw": 100.0, "tps": 6.96, "params_b": 14.77, "accel": "Apple M2 4P+4E+8GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 168.0, "tps": 26.93, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 3050 6GB Laptop GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 168.0, "tps": 108.26, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 3050 6GB Laptop GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 936.0, "tps": 53.23, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 3090", "model": "Qwen2.5 14B Instruct"}, {"bw": 936.0, "tps": 55.23, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 3090", "model": "Qwen2.5 14B Instruct"}, {"bw": 936.0, "tps": 276.24, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 3090", "model": "Llama 3.2 1B Instruct"}, {"bw": 336.0, "tps": 23.44, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 2060", "model": "Qwen2.5 14B Instruct"}, {"bw": 546.0, "tps": 31.08, "params_b": 14.77, "accel": "Apple M4 Max 12P+4E+40GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 336.0, "tps": 2.02, "params_b": 22.25, "accel": "NVIDIA GeForce RTX 2060", "model": "Mistral Small 22B ArliAI RPMax v1.1"}, {"bw": 336.0, "tps": 22.2, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 2060", "model": "Qwen2.5 14B Instruct"}, {"bw": 400.0, "tps": 21.07, "params_b": 14.77, "accel": "Apple M1 Max 8P+2E+32GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 89.0, "tps": 3.31, "params_b": 8.03, "accel": "AMD Radeon 780M Graphics", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 273.0, "tps": 17.55, "params_b": 14.77, "accel": "Apple M4 Pro 10P+4E+20GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 273.0, "tps": 32.93, "params_b": 8.03, "accel": "Apple M4 Pro 10P+4E+20GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 120.0, "tps": 66.16, "params_b": 1.5, "accel": "Apple M4 4P+6E+8GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 400.0, "tps": 104.74, "params_b": 1.5, "accel": "Apple M1 Max 8P+2E+24GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 89.0, "tps": 11.2, "params_b": 1.5, "accel": "AMD Radeon 780M Graphics", "model": "Llama 3.2 1B Instruct"}, {"bw": 273.0, "tps": 111.38, "params_b": 1.5, "accel": "Apple M4 Pro 8P+4E+16GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 100.0, "tps": 17.8, "params_b": 1.5, "accel": "Apple M2 4P+4E+8GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 288.0, "tps": 119.64, "params_b": 1.5, "accel": "NVIDIA RTX A2000 12GB", "model": "Llama 3.2 1B Instruct"}, {"bw": 288.0, "tps": 19.3, "params_b": 14.77, "accel": "NVIDIA RTX A2000 12GB", "model": "Qwen2.5 14B Instruct"}, {"bw": 1008.0, "tps": 115.01, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 4090", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 1008.0, "tps": 65.62, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 4090", "model": "Qwen2.5 14B Instruct"}, {"bw": 273.0, "tps": 32.5, "params_b": 8.03, "accel": "Apple M4 Pro 10P+4E+20GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 120.0, "tps": 14.68, "params_b": 8.03, "accel": "Apple M4 4P+6E+10GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 448.0, "tps": 65.33, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 3060 Ti", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 448.0, "tps": 199.04, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 3060 Ti", "model": "Llama 3.2 1B Instruct"}, {"bw": 936.0, "tps": 27.13, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 3090", "model": "Qwen2.5 14B Instruct"}, {"bw": 864.0, "tps": 62.07, "params_b": 1.5, "accel": "NVIDIA L40S-8Q", "model": "Llama 3.2 1B Instruct"}, {"bw": 448.0, "tps": 36.54, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 5060 Ti", "model": "Qwen2.5 14B Instruct"}, {"bw": 546.0, "tps": 26.41, "params_b": 14.77, "accel": "Apple M4 Max 12P+4E+40GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 400.0, "tps": 148.39, "params_b": 1.5, "accel": "Apple M3 Max 12P+4E+40GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 936.0, "tps": 48.72, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 3090", "model": "Qwen2.5 14B Instruct"}, {"bw": 256.0, "tps": 41.01, "params_b": 4.41, "accel": "NVIDIA GeForce RTX 4070 Laptop GPU", "model": "Qwen3 4B Thinking 2507"}, {"bw": 256.0, "tps": 3.84, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 4070 Laptop GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 273.0, "tps": 18.22, "params_b": 14.77, "accel": "Apple M4 Pro 10P+4E+20GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 273.0, "tps": 32.47, "params_b": 8.03, "accel": "Apple M4 Pro 10P+4E+20GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 273.0, "tps": 18.2, "params_b": 14.77, "accel": "Apple M4 Pro 10P+4E+20GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 273.0, "tps": 33.14, "params_b": 8.03, "accel": "Apple M4 Pro 10P+4E+20GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 256.0, "tps": 40.85, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 4070 Laptop GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 256.0, "tps": 40.9, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 4070 Laptop GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 546.0, "tps": 184.88, "params_b": 1.5, "accel": "Apple M4 Max 12P+4E+40GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 546.0, "tps": 24.78, "params_b": 14.77, "accel": "Apple M4 Max 12P+4E+40GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 68.0, "tps": 13.39, "params_b": 8.03, "accel": "Apple M1 4P+4E+7GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 448.0, "tps": 127.68, "params_b": 1.5, "accel": "NVIDIA RTX A4000", "model": "Llama 3.2 1B Instruct"}, {"bw": 120.0, "tps": 11.7, "params_b": 8.03, "accel": "Apple M4 4P+6E+10GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 120.0, "tps": 76.01, "params_b": 1.5, "accel": "Apple M4 4P+6E+10GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 273.0, "tps": 120.84, "params_b": 1.5, "accel": "Apple M4 Pro 10P+4E+20GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 150.0, "tps": 12.1, "params_b": 14.77, "accel": "Apple M3 Pro 6P+6E+18GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 717.0, "tps": 13.44, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 4080", "model": "Qwen2.5 14B Instruct"}, {"bw": 717.0, "tps": 13.54, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 4080", "model": "Qwen2.5 14B Instruct"}, {"bw": 360.0, "tps": 27.4, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 3060", "model": "Qwen2.5 14B Instruct"}, {"bw": 273.0, "tps": 30.48, "params_b": 8.03, "accel": "Apple M4 Pro 8P+4E+16GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 504.0, "tps": 218.42, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 4070 SUPER", "model": "Llama 3.2 1B Instruct"}, {"bw": 346.0, "tps": 15.73, "params_b": 14.77, "accel": "Tesla P40", "model": "Qwen2.5 14B Instruct"}, {"bw": 346.0, "tps": 29.32, "params_b": 30.53, "accel": "Tesla P40", "model": "Qwen3-Coder-30B-A3B-Instruct-1M"}, {"bw": 1008.0, "tps": 431.54, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 4090", "model": "Llama 3.2 1B Instruct"}, {"bw": 1008.0, "tps": 125.22, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 4090", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 1008.0, "tps": 72.51, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 4090", "model": "Qwen2.5 14B Instruct"}, {"bw": 448.0, "tps": 55.55, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 2070", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 256.0, "tps": 103.26, "params_b": 1.5, "accel": "NVIDIA GeForce GTX 1070", "model": "Llama 3.2 1B Instruct"}, {"bw": 448.0, "tps": 184.09, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 2070", "model": "Llama 3.2 1B Instruct"}, {"bw": 200.0, "tps": 99.52, "params_b": 1.5, "accel": "Apple M2 Pro 8P+4E+19GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 100.0, "tps": 13.98, "params_b": 8.03, "accel": "Apple M2 4P+4E+10GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 128.0, "tps": 20.63, "params_b": 1.5, "accel": "NVIDIA GeForce GTX 1650", "model": "Llama 3.2 1B Instruct"}, {"bw": 896.0, "tps": 44.05, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 5070 Ti", "model": "Llama 3.2 1B Instruct"}, {"bw": 448.0, "tps": 45.44, "params_b": 8.03, "accel": "NVIDIA RTX A4000", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 448.0, "tps": 117.72, "params_b": 1.5, "accel": "NVIDIA RTX A4000", "model": "Llama 3.2 1B Instruct"}, {"bw": 120.0, "tps": 9.24, "params_b": 14.77, "accel": "Apple M4 4P+6E+10GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 864.0, "tps": 351.57, "params_b": 1.5, "accel": "NVIDIA L40S", "model": "Llama 3.2 1B Instruct"}, {"bw": 760.0, "tps": 92.11, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 3080", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 448.0, "tps": 26.55, "params_b": 14.77, "accel": "NVIDIA RTX A4000", "model": "Qwen2.5 14B Instruct"}, {"bw": 448.0, "tps": 135.27, "params_b": 1.5, "accel": "NVIDIA RTX A4000", "model": "Llama 3.2 1B Instruct"}, {"bw": 448.0, "tps": 46.01, "params_b": 8.03, "accel": "NVIDIA RTX A4000", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 360.0, "tps": 49.62, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 3060", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 360.0, "tps": 156.76, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 3060", "model": "Llama 3.2 1B Instruct"}, {"bw": 288.0, "tps": 42.81, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 4060 Ti", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 448.0, "tps": 42.61, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 3060 Ti", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 448.0, "tps": 59.74, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 3070", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 448.0, "tps": 215.99, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 3070", "model": "Llama 3.2 1B Instruct"}, {"bw": 546.0, "tps": 20.24, "params_b": 14.77, "accel": "Apple M4 Max 10P+4E+32GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 546.0, "tps": 165.48, "params_b": 1.5, "accel": "Apple M4 Max 10P+4E+32GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 400.0, "tps": 37.73, "params_b": 8.03, "accel": "Apple M1 Max 8P+2E+32GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 400.0, "tps": 124.47, "params_b": 1.5, "accel": "Apple M1 Max 8P+2E+32GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 400.0, "tps": 21.01, "params_b": 14.77, "accel": "Apple M1 Max 8P+2E+32GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 504.0, "tps": 36.03, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 4070", "model": "Qwen2.5 14B Instruct"}, {"bw": 864.0, "tps": 3.45, "params_b": 14.77, "accel": "NVIDIA L40S-8Q", "model": "Qwen2.5 14B Instruct"}, {"bw": 864.0, "tps": 62.76, "params_b": 1.5, "accel": "NVIDIA L40S-8Q", "model": "Llama 3.2 1B Instruct"}, {"bw": 200.0, "tps": 10.78, "params_b": 14.77, "accel": "Apple M1 Pro 6P+2E+14GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 200.0, "tps": 20.18, "params_b": 8.03, "accel": "Apple M1 Pro 6P+2E+14GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 200.0, "tps": 72.47, "params_b": 1.5, "accel": "Apple M1 Pro 6P+2E+14GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 120.0, "tps": 9.63, "params_b": 14.77, "accel": "Apple M4 4P+6E+10GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 819.0, "tps": 35.78, "params_b": 14.77, "accel": "Apple M3 Ultra 24P+8E+80GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 120.0, "tps": 17.73, "params_b": 8.03, "accel": "Apple M4 4P+6E+10GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 819.0, "tps": 62.73, "params_b": 8.03, "accel": "Apple M3 Ultra 24P+8E+80GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 120.0, "tps": 77.14, "params_b": 1.5, "accel": "Apple M4 4P+6E+10GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 819.0, "tps": 178.47, "params_b": 1.5, "accel": "Apple M3 Ultra 24P+8E+80GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 128.0, "tps": 23.8, "params_b": 1.5, "accel": "NVIDIA GeForce GTX 1650", "model": "Llama 3.2 1B Instruct"}, {"bw": 864.0, "tps": 351.55, "params_b": 1.5, "accel": "NVIDIA L40S", "model": "Llama 3.2 1B Instruct"}, {"bw": 936.0, "tps": 26.97, "params_b": 30.53, "accel": "NVIDIA GeForce RTX 3090", "model": "Qwen3-30B-A3B-Instruct-2507"}, {"bw": 736.0, "tps": 57.36, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 4080 SUPER", "model": "Qwen2.5 14B Instruct"}, {"bw": 272.0, "tps": 40.66, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 4060", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 100.0, "tps": 68.18, "params_b": 1.5, "accel": "Apple M3 4P+4E+10GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 448.0, "tps": 53.07, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 5060 Ti", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 448.0, "tps": 30.52, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 5060 Ti", "model": "Qwen2.5 14B Instruct"}, {"bw": 448.0, "tps": 189.72, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 5060 Ti", "model": "Llama 3.2 1B Instruct"}, {"bw": 120.0, "tps": 9.2, "params_b": 14.77, "accel": "Apple M4 4P+6E+10GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 448.0, "tps": 91.87, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 5060 Ti", "model": "Llama 3.2 1B Instruct"}, {"bw": 800.0, "tps": 131.55, "params_b": 1.5, "accel": "Radeon RX 7900 XT", "model": "Llama 3.2 1B Instruct"}, {"bw": 800.0, "tps": 131.46, "params_b": 1.5, "accel": "Radeon RX 7900 XT", "model": "Llama 3.2 1B Instruct"}, {"bw": 120.0, "tps": 16.89, "params_b": 8.03, "accel": "Apple M4 4P+6E+10GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 120.0, "tps": 75.24, "params_b": 1.5, "accel": "Apple M4 4P+6E+10GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 150.0, "tps": 11.21, "params_b": 14.77, "accel": "Apple M3 Pro 6P+6E+18GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 150.0, "tps": 20.81, "params_b": 8.03, "accel": "Apple M3 Pro 6P+6E+18GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 672.0, "tps": 74.84, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 5070", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 150.0, "tps": 85.63, "params_b": 1.5, "accel": "Apple M3 Pro 6P+6E+18GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 400.0, "tps": 105.8, "params_b": 1.5, "accel": "Apple M1 Max 8P+2E+24GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 273.0, "tps": 30.56, "params_b": 8.03, "accel": "Apple M4 Pro 8P+4E+16GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 192.0, "tps": 93.9, "params_b": 1.5, "accel": "Quadro P4000", "model": "Llama 3.2 1B Instruct"}, {"bw": 192.0, "tps": 94.25, "params_b": 1.5, "accel": "Quadro P4000", "model": "Llama 3.2 1B Instruct"}, {"bw": 273.0, "tps": 32.7, "params_b": 8.03, "accel": "Apple M4 Pro 10P+4E+20GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 1792.0, "tps": 27.43, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 5090", "model": "Qwen2.5 14B Instruct"}, {"bw": 346.0, "tps": 40.86, "params_b": 6.74, "accel": "Tesla P40", "model": "LLaMA v2"}, {"bw": 448.0, "tps": 196.28, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 2070", "model": "Llama 3.2 1B Instruct"}, {"bw": 448.0, "tps": 51.73, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 2070", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 224.0, "tps": 20.84, "params_b": 8.03, "accel": "AMD Radeon RX 6600", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 273.0, "tps": 16.48, "params_b": 14.77, "accel": "Apple M4 Pro 8P+4E+16GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 153.0, "tps": 97.08, "params_b": 1.5, "accel": "Apple M5 4P+6E+10GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 288.0, "tps": 18.87, "params_b": 14.77, "accel": "NVIDIA RTX A2000 12GB", "model": "Qwen2.5 14B Instruct"}, {"bw": 1792.0, "tps": 25.89, "params_b": 14.77, "accel": "NVIDIA RTX PRO 6000 Blackwell Max-Q Workstation ", "model": "Qwen2.5 14B Instruct"}, {"bw": 1792.0, "tps": 46.91, "params_b": 8.03, "accel": "NVIDIA RTX PRO 6000 Blackwell Max-Q Workstation ", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 1792.0, "tps": 108.64, "params_b": 1.5, "accel": "NVIDIA RTX PRO 6000 Blackwell Max-Q Workstation ", "model": "Llama 3.2 1B Instruct"}, {"bw": 1792.0, "tps": 46.86, "params_b": 8.03, "accel": "NVIDIA RTX PRO 6000 Blackwell Max-Q Workstation ", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 200.0, "tps": 24.34, "params_b": 8.03, "accel": "Apple M2 Pro 6P+4E+16GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 1008.0, "tps": 68.84, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 4090", "model": "Qwen2.5 14B Instruct"}, {"bw": 1008.0, "tps": 119.61, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 4090", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 1008.0, "tps": 381.74, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 4090", "model": "Llama 3.2 1B Instruct"}, {"bw": 89.0, "tps": 5.91, "params_b": 1.5, "accel": "AMD Radeon 780M Graphics", "model": "Llama 3.2 1B Instruct"}, {"bw": 484.0, "tps": 19.43, "params_b": 14.77, "accel": "NVIDIA GeForce GTX 1080 Ti", "model": "Qwen2.5 14B Instruct"}, {"bw": 120.0, "tps": 16.57, "params_b": 8.03, "accel": "Apple M4 4P+6E+10GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 273.0, "tps": 17.89, "params_b": 14.77, "accel": "Apple M4 Pro 10P+4E+20GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 273.0, "tps": 32.72, "params_b": 8.03, "accel": "Apple M4 Pro 10P+4E+20GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 273.0, "tps": 117.11, "params_b": 1.5, "accel": "Apple M4 Pro 10P+4E+20GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 273.0, "tps": 10.93, "params_b": 14.77, "accel": "Apple M4 Pro 8P+4E+16GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 1792.0, "tps": 114.07, "params_b": 1.5, "accel": "NVIDIA RTX PRO 6000 Blackwell Workstation Editio", "model": "Llama 3.2 1B Instruct"}, {"bw": 400.0, "tps": 25.48, "params_b": 14.77, "accel": "Apple M3 Max 12P+4E+40GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 200.0, "tps": 23.0, "params_b": 8.19, "accel": "Apple M2 Pro 6P+4E+16GPU", "model": "Qwen3 8B"}, {"bw": 200.0, "tps": 13.91, "params_b": 14.77, "accel": "Apple M2 Pro 6P+4E+16GPU", "model": "Qwen3 14B"}, {"bw": 200.0, "tps": 23.09, "params_b": 8.19, "accel": "Apple M2 Pro 6P+4E+16GPU", "model": "DeepSeek R1 0528 Qwen3 8B"}, {"bw": 200.0, "tps": 13.37, "params_b": 14.77, "accel": "Apple M2 Pro 6P+4E+16GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 200.0, "tps": 91.47, "params_b": 1.5, "accel": "Apple M2 Pro 6P+4E+16GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 484.0, "tps": 20.56, "params_b": 14.77, "accel": "NVIDIA GeForce GTX 1080 Ti", "model": "Qwen2.5 14B Instruct"}, {"bw": 484.0, "tps": 146.01, "params_b": 1.5, "accel": "NVIDIA GeForce GTX 1080 Ti", "model": "Llama 3.2 1B Instruct"}, {"bw": 546.0, "tps": 29.56, "params_b": 14.77, "accel": "Apple M4 Max 12P+4E+40GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 546.0, "tps": 52.41, "params_b": 8.03, "accel": "Apple M4 Max 12P+4E+40GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 546.0, "tps": 177.29, "params_b": 1.5, "accel": "Apple M4 Max 12P+4E+40GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 546.0, "tps": 30.77, "params_b": 14.77, "accel": "Apple M4 Max 12P+4E+40GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 1792.0, "tps": 121.67, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 5090", "model": "Llama 3.2 1B Instruct"}, {"bw": 68.0, "tps": 38.29, "params_b": 1.5, "accel": "Apple M1 4P+4E+7GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 273.0, "tps": 16.67, "params_b": 14.77, "accel": "Apple M4 Pro 8P+4E+16GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 288.0, "tps": 19.5, "params_b": 14.77, "accel": "NVIDIA RTX A2000 12GB", "model": "Qwen2.5 14B Instruct"}, {"bw": 224.0, "tps": 56.24, "params_b": 1.5, "accel": "Radeon RX 5500 XT", "model": "Llama 3.2 1B Instruct"}, {"bw": 864.0, "tps": 361.41, "params_b": 1.5, "accel": "NVIDIA L40S", "model": "Llama 3.2 1B Instruct"}, {"bw": 280.0, "tps": 73.9, "params_b": 1.3, "accel": "AMD Radeon RX 6650 XT", "model": "Gemma 3 1b"}, {"bw": 224.0, "tps": 5.17, "params_b": 8.03, "accel": "AMD Radeon RX 6600", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 1792.0, "tps": 359.64, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 5090", "model": "Llama 3.2 1B Instruct"}, {"bw": 960.0, "tps": 405.71, "params_b": 1.5, "accel": "NVIDIA RTX 6000 Ada Generation", "model": "Llama 3.2 1B Instruct"}, {"bw": 192.0, "tps": 4.75, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 3050 Laptop GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 1792.0, "tps": 21.53, "params_b": 14.77, "accel": "NVIDIA RTX PRO 6000 Blackwell Server Edition", "model": "Qwen2.5 14B Instruct"}, {"bw": 1792.0, "tps": 104.9, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 5090", "model": "Llama 3.2 1B Instruct"}, {"bw": 100.0, "tps": 39.07, "params_b": 1.5, "accel": "Apple M2 4P+4E+8GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 546.0, "tps": 29.32, "params_b": 14.77, "accel": "Apple M4 Max 12P+4E+40GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 546.0, "tps": 53.12, "params_b": 8.03, "accel": "Apple M4 Max 12P+4E+40GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 546.0, "tps": 179.23, "params_b": 1.5, "accel": "Apple M4 Max 12P+4E+40GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 800.0, "tps": 153.79, "params_b": 1.5, "accel": "Apple M1 Ultra 16P+4E+64GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 1792.0, "tps": 109.51, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 5090", "model": "Llama 3.2 1B Instruct"}, {"bw": 546.0, "tps": 180.19, "params_b": 1.5, "accel": "Apple M4 Max 12P+4E+40GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 800.0, "tps": 54.43, "params_b": 8.03, "accel": "Apple M1 Ultra 16P+4E+64GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 800.0, "tps": 148.35, "params_b": 1.5, "accel": "Apple M1 Ultra 16P+4E+64GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 800.0, "tps": 31.51, "params_b": 14.77, "accel": "Apple M1 Ultra 16P+4E+64GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 140.0, "tps": 58.31, "params_b": 1.5, "accel": "Quadro P2000", "model": "Llama 3.2 1B Instruct"}, {"bw": 273.0, "tps": 17.9, "params_b": 14.77, "accel": "Apple M4 Pro 10P+4E+20GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 800.0, "tps": 54.18, "params_b": 8.03, "accel": "Apple M1 Ultra 16P+4E+64GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 336.0, "tps": 14.78, "params_b": 0.97, "accel": "NVIDIA GeForce GTX 1660 SUPER", "model": "Qwen3 Coder 0.6B 0.9B 7 1bR5 Nobs F16"}, {"bw": 336.0, "tps": 63.76, "params_b": 0.63, "accel": "NVIDIA GeForce GTX 1660 SUPER", "model": "Qwen2.5 0.5B Instruct"}, {"bw": 192.0, "tps": 8.38, "params_b": 1.78, "accel": "NVIDIA GeForce GTX 1060 3GB", "model": "DeepSeek R1 Distill Qwen 1.5B"}, {"bw": 336.0, "tps": 28.53, "params_b": 1.1, "accel": "NVIDIA GeForce GTX 1660 SUPER", "model": "tinyllama_tinyllama-1.1b-chat-v1.0"}, {"bw": 192.0, "tps": 71.41, "params_b": 1.1, "accel": "NVIDIA GeForce GTX 1060 3GB", "model": "tinyllama_tinyllama-1.1b-chat-v1.0"}, {"bw": 192.0, "tps": 98.73, "params_b": 0.63, "accel": "NVIDIA GeForce GTX 1060 3GB", "model": "Qwen2.5 0.5B Instruct"}, {"bw": 336.0, "tps": 63.09, "params_b": 0.62, "accel": "NVIDIA GeForce GTX 1660 SUPER", "model": "Qwen1.5-0.5B-Chat-AWQ-fp16"}, {"bw": 336.0, "tps": 7.53, "params_b": 1.78, "accel": "NVIDIA GeForce GTX 1660 SUPER", "model": "DeepSeek R1 Distill Qwen 1.5B"}, {"bw": 336.0, "tps": 14.38, "params_b": 3.61, "accel": "NVIDIA GeForce GTX 1660 SUPER", "model": "Llama 3.2 3B Instruct"}, {"bw": 546.0, "tps": 181.52, "params_b": 1.5, "accel": "Apple M4 Max 12P+4E+40GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 546.0, "tps": 31.06, "params_b": 14.77, "accel": "Apple M4 Max 12P+4E+40GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 336.0, "tps": 63.93, "params_b": 0.63, "accel": "NVIDIA GeForce GTX 1660 SUPER", "model": "Qwen2.5 0.5B Instruct"}, {"bw": 336.0, "tps": 28.47, "params_b": 1.1, "accel": "NVIDIA GeForce GTX 1660 SUPER", "model": "tinyllama_tinyllama-1.1b-chat-v1.0"}, {"bw": 336.0, "tps": 63.93, "params_b": 0.63, "accel": "NVIDIA GeForce GTX 1660 SUPER", "model": "Qwen2.5 0.5B Instruct"}, {"bw": 336.0, "tps": 15.72, "params_b": 7.62, "accel": "NVIDIA GeForce GTX 1660 SUPER", "model": "Qwen2.5 Coder 7B Instruct"}, {"bw": 200.0, "tps": 10.4, "params_b": 14.77, "accel": "Apple M1 Pro 8P+2E+14GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 336.0, "tps": 41.03, "params_b": 7.62, "accel": "NVIDIA GeForce RTX 2060", "model": "Qwen2.5 Coder 7B Instruct"}, {"bw": 336.0, "tps": 38.08, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 2060", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 336.0, "tps": 125.37, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 2060", "model": "Llama 3.2 1B Instruct"}, {"bw": 336.0, "tps": 128.22, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 2060", "model": "Llama 3.2 1B Instruct"}, {"bw": 336.0, "tps": 9.98, "params_b": 8.03, "accel": "NVIDIA GeForce GTX 1660 SUPER", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 336.0, "tps": 36.51, "params_b": 1.5, "accel": "NVIDIA GeForce GTX 1660 SUPER", "model": "Llama 3.2 1B Instruct"}, {"bw": 100.0, "tps": 1563.57, "params_b": 14.77, "accel": "Apple M3 4P+4E+10GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 400.0, "tps": 46.02, "params_b": 8.03, "accel": "Apple M3 Max 12P+4E+40GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 120.0, "tps": 9.84, "params_b": 14.77, "accel": "Apple M4 4P+6E+10GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 400.0, "tps": 25.5, "params_b": 14.77, "accel": "Apple M3 Max 12P+4E+40GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 504.0, "tps": 41.36, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 4070 Ti", "model": "Llama 3.2 1B Instruct"}, {"bw": 120.0, "tps": 68.47, "params_b": 1.5, "accel": "Apple M4 4P+6E", "model": "Llama 3.2 1B Instruct"}, {"bw": 120.0, "tps": 78.56, "params_b": 1.5, "accel": "Apple M4 4P+6E+10GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 400.0, "tps": 11.87, "params_b": 14.77, "accel": "Apple M1 Max 8P+2E+32GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 400.0, "tps": 23.78, "params_b": 14.77, "accel": "Apple M2 Max 8P+4E+38GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 200.0, "tps": 22.66, "params_b": 8.03, "accel": "Apple M1 Pro 8P+2E+16GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 200.0, "tps": 82.25, "params_b": 1.5, "accel": "Apple M1 Pro 8P+2E+16GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 960.0, "tps": 150.91, "params_b": 1.5, "accel": "AMD Radeon RX 7900 XTX", "model": "Llama 3.2 1B Instruct"}, {"bw": 400.0, "tps": 13.78, "params_b": 14.77, "accel": "Apple M1 Max 8P+2E+24GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 400.0, "tps": 105.08, "params_b": 1.5, "accel": "Apple M1 Max 8P+2E+24GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 273.0, "tps": 32.83, "params_b": 8.03, "accel": "Apple M4 Pro 10P+4E+20GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 100.0, "tps": 17.88, "params_b": 8.03, "accel": "Apple M2 4P+4E+8GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 1792.0, "tps": 26.09, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 5090", "model": "Qwen2.5 14B Instruct"}, {"bw": 1792.0, "tps": 105.46, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 5090", "model": "Llama 3.2 1B Instruct"}, {"bw": 100.0, "tps": 6.01, "params_b": 14.77, "accel": "Apple M3 4P+4E+10GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 1792.0, "tps": 49.55, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 5090", "model": "Qwen2.5 14B Instruct"}, {"bw": 1792.0, "tps": 158.5, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 5090", "model": "Llama 3.2 1B Instruct"}, {"bw": 100.0, "tps": 66.98, "params_b": 1.5, "accel": "Apple M3 4P+4E+10GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 273.0, "tps": 18.02, "params_b": 14.77, "accel": "Apple M4 Pro 10P+4E+20GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 512.0, "tps": 18.9, "params_b": 14.77, "accel": "AMD Radeon RX 6800", "model": "Qwen2.5 14B Instruct"}, {"bw": 288.0, "tps": 5.46, "params_b": 8.03, "accel": "AMD Radeon RX 7600", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 1008.0, "tps": 51.75, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 4090", "model": "Qwen2.5 14B Instruct"}, {"bw": 400.0, "tps": 132.99, "params_b": 1.5, "accel": "Apple M3 Max 10P+4E+30GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 120.0, "tps": 74.37, "params_b": 1.5, "accel": "Apple M4 4P+6E+10GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 448.0, "tps": 62.65, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 5060 Ti", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 68.0, "tps": 40.61, "params_b": 1.5, "accel": "Apple M1 4P+4E+8GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 273.0, "tps": 32.73, "params_b": 8.03, "accel": "Apple M4 Pro 10P+4E+20GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 432.0, "tps": 17.35, "params_b": 14.77, "accel": "NVIDIA RTX 3500 Ada Generation Laptop GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 432.0, "tps": 26.15, "params_b": 8.03, "accel": "NVIDIA RTX 3500 Ada Generation Laptop GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 273.0, "tps": 17.79, "params_b": 14.77, "accel": "Apple M4 Pro 10P+4E+20GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 256.0, "tps": 90.1, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 4070 Laptop GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 416.0, "tps": 38.8, "params_b": 8.03, "accel": "Quadro RTX 4000", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 936.0, "tps": 65.39, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 3090", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 546.0, "tps": 31.22, "params_b": 14.77, "accel": "Apple M4 Max 12P+4E+40GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 936.0, "tps": 59.42, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 3090", "model": "Qwen2.5 14B Instruct"}, {"bw": 273.0, "tps": 18.03, "params_b": 14.77, "accel": "Apple M4 Pro 10P+4E+20GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 1792.0, "tps": 14.35, "params_b": 14.77, "accel": "NVIDIA RTX PRO 6000 Blackwell Server Edition", "model": "Qwen2.5 14B Instruct"}, {"bw": 546.0, "tps": 56.13, "params_b": 8.03, "accel": "Apple M4 Max 12P+4E+40GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 546.0, "tps": 181.39, "params_b": 1.5, "accel": "Apple M4 Max 12P+4E+40GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 717.0, "tps": 82.82, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 4080", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 717.0, "tps": 352.44, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 4080", "model": "Llama 3.2 1B Instruct"}, {"bw": 717.0, "tps": 92.07, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 4080", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 1792.0, "tps": 45.7, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 5090", "model": "Llama 3.2 1B Instruct"}, {"bw": 768.0, "tps": 307.22, "params_b": 1.5, "accel": "NVIDIA RTX A5000", "model": "Llama 3.2 1B Instruct"}, {"bw": 768.0, "tps": 88.25, "params_b": 8.03, "accel": "NVIDIA RTX A5000", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 768.0, "tps": 50.01, "params_b": 14.77, "accel": "NVIDIA RTX A5000", "model": "Qwen2.5 14B Instruct"}, {"bw": 936.0, "tps": 58.3, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 3090", "model": "Qwen2.5 14B Instruct"}, {"bw": 936.0, "tps": 100.85, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 3090", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 936.0, "tps": 341.86, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 3090", "model": "Llama 3.2 1B Instruct"}, {"bw": 672.0, "tps": 27.17, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 4070 Ti SUPER", "model": "Qwen2.5 14B Instruct"}, {"bw": 672.0, "tps": 49.86, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 4070 Ti SUPER", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 672.0, "tps": 143.99, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 4070 Ti SUPER", "model": "Llama 3.2 1B Instruct"}, {"bw": 1792.0, "tps": 45.01, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 5090", "model": "Llama 3.2 1B Instruct"}, {"bw": 1792.0, "tps": 45.1, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 5090", "model": "Llama 3.2 1B Instruct"}, {"bw": 273.0, "tps": 17.8, "params_b": 14.77, "accel": "Apple M4 Pro 10P+4E+20GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 273.0, "tps": 115.36, "params_b": 1.5, "accel": "Apple M4 Pro 10P+4E+20GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 112.0, "tps": 41.51, "params_b": 1.5, "accel": "NVIDIA GeForce GTX 1050", "model": "Llama 3.2 1B Instruct"}, {"bw": 400.0, "tps": 13.92, "params_b": 14.77, "accel": "Apple M3 Max 12P+4E+40GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 400.0, "tps": 25.75, "params_b": 8.03, "accel": "Apple M3 Max 12P+4E+40GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 400.0, "tps": 108.58, "params_b": 1.5, "accel": "Apple M3 Max 12P+4E+40GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 448.0, "tps": 5.32, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 2080", "model": "Qwen2.5 14B Instruct"}, {"bw": 448.0, "tps": 57.08, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 2080", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 448.0, "tps": 188.17, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 2080", "model": "Llama 3.2 1B Instruct"}, {"bw": 400.0, "tps": 14.49, "params_b": 14.77, "accel": "Apple M2 Max 8P+4E+30GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 400.0, "tps": 13.68, "params_b": 14.77, "accel": "Apple M3 Max 12P+4E+40GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 400.0, "tps": 25.14, "params_b": 8.03, "accel": "Apple M3 Max 12P+4E+40GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 400.0, "tps": 105.37, "params_b": 1.5, "accel": "Apple M3 Max 12P+4E+40GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 416.0, "tps": 41.19, "params_b": 8.03, "accel": "Quadro RTX 4000", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 448.0, "tps": 52.29, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 3080 Laptop GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 819.0, "tps": 62.62, "params_b": 8.03, "accel": "Apple M3 Ultra 24P+8E+80GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 819.0, "tps": 35.68, "params_b": 14.77, "accel": "Apple M3 Ultra 24P+8E+80GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 448.0, "tps": 28.78, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 3080 Laptop GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 819.0, "tps": 179.21, "params_b": 1.5, "accel": "Apple M3 Ultra 24P+8E+80GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 448.0, "tps": 157.29, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 3080 Laptop GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 100.0, "tps": 21.34, "params_b": 1.5, "accel": "Apple M2 4P+4E+8GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 336.0, "tps": 43.55, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 3060 Laptop GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 336.0, "tps": 181.41, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 3060 Laptop GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 288.0, "tps": 28.73, "params_b": 8.03, "accel": "NVIDIA RTX A2000", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 288.0, "tps": 116.51, "params_b": 1.5, "accel": "NVIDIA RTX A2000", "model": "Llama 3.2 1B Instruct"}, {"bw": 360.0, "tps": 28.61, "params_b": 14.77, "accel": "NVIDIA RTX 4000 Ada Generation", "model": "Qwen2.5 14B Instruct"}, {"bw": 360.0, "tps": 50.58, "params_b": 8.03, "accel": "NVIDIA RTX 4000 Ada Generation", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 360.0, "tps": 183.42, "params_b": 1.5, "accel": "NVIDIA RTX 4000 Ada Generation", "model": "Llama 3.2 1B Instruct"}, {"bw": 200.0, "tps": 21.02, "params_b": 8.03, "accel": "Apple M1 Pro 8P+2E+16GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 200.0, "tps": 75.64, "params_b": 1.5, "accel": "Apple M1 Pro 8P+2E+16GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 288.0, "tps": 85.11, "params_b": 0.75, "accel": "NVIDIA RTX A2000", "model": "Qwen3 Mini Moe"}, {"bw": 504.0, "tps": 43.58, "params_b": 12.77, "accel": "NVIDIA GeForce RTX 4070 Ti", "model": "gemma-3-12b-it-codeforces-SFT"}, {"bw": 504.0, "tps": 39.61, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 4070 Ti", "model": "Local_Pretrained_Model"}, {"bw": 504.0, "tps": 40.37, "params_b": 8.37, "accel": "NVIDIA GeForce RTX 4070 Ti", "model": "Granite 3.3 8b Instruct"}, {"bw": 504.0, "tps": 63.05, "params_b": 6.74, "accel": "NVIDIA GeForce RTX 4070 Ti", "model": "codellama_codellama-7b-instruct-hf"}, {"bw": 504.0, "tps": 54.23, "params_b": 8.02, "accel": "NVIDIA GeForce RTX 4070 Ti", "model": "Ministral 8B Instruct 2410 TEST"}, {"bw": 504.0, "tps": 45.21, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 4070 Ti", "model": "Cogito v1 Preview Llama 8B"}, {"bw": 1008.0, "tps": 65.51, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 3090 Ti", "model": "Qwen2.5 14B Instruct"}, {"bw": 546.0, "tps": 32.17, "params_b": 14.77, "accel": "Apple M4 Max 12P+4E+40GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 546.0, "tps": 31.33, "params_b": 14.77, "accel": "Apple M4 Max 12P+4E+40GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 68.0, "tps": 39.07, "params_b": 1.5, "accel": "Apple M1 4P+4E+8GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 224.0, "tps": 123.01, "params_b": 1.5, "accel": "NVIDIA RTX A1000 Laptop GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 200.0, "tps": 21.97, "params_b": 8.03, "accel": "Apple M1 Pro 8P+2E+16GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 200.0, "tps": 78.8, "params_b": 1.5, "accel": "Apple M1 Pro 8P+2E+16GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 224.0, "tps": 172.07, "params_b": 1.1, "accel": "NVIDIA RTX 2000 Ada Generation", "model": ".."}, {"bw": 120.0, "tps": 65.78, "params_b": 1.5, "accel": "Apple M4 4P+6E+8GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 120.0, "tps": 65.5, "params_b": 1.5, "accel": "Apple M4 4P+6E+8GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 504.0, "tps": 41.78, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 4070 Ti", "model": "Qwen2.5 14B Instruct"}, {"bw": 336.0, "tps": 38.4, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 3060 Laptop GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 336.0, "tps": 159.87, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 3060 Laptop GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 384.0, "tps": 100.82, "params_b": 1.5, "accel": "AMD Radeon RX 6700 XT", "model": "Llama 3.2 1B Instruct"}, {"bw": 768.0, "tps": 45.47, "params_b": 8.03, "accel": "NVIDIA RTX A5000 Laptop GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 768.0, "tps": 26.72, "params_b": 14.77, "accel": "NVIDIA RTX A5000 Laptop GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 768.0, "tps": 29.16, "params_b": 12.77, "accel": "NVIDIA RTX A5000 Laptop GPU", "model": "Gemma 3 12b It"}, {"bw": 768.0, "tps": 201.95, "params_b": 1.5, "accel": "NVIDIA RTX A5000 Laptop GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 400.0, "tps": 109.85, "params_b": 0.75, "accel": "Apple M1 Max 8P+2E+32GPU", "model": "Qwen3 Embedding 0.6b"}, {"bw": 400.0, "tps": 32.61, "params_b": 8.37, "accel": "Apple M1 Max 8P+2E+32GPU", "model": "Granite 3.2 8b Instruct"}, {"bw": 448.0, "tps": 35.05, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 5060 Ti", "model": "Qwen2.5 14B Instruct"}, {"bw": 112.0, "tps": 27.27, "params_b": 1.5, "accel": "NVIDIA GeForce GTX 1050", "model": "Llama 3.2 1B Instruct"}, {"bw": 273.0, "tps": 17.89, "params_b": 14.77, "accel": "Apple M4 Pro 10P+4E+20GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 273.0, "tps": 32.59, "params_b": 8.03, "accel": "Apple M4 Pro 10P+4E+20GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 273.0, "tps": 117.12, "params_b": 1.5, "accel": "Apple M4 Pro 10P+4E+20GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 819.0, "tps": 63.3, "params_b": 8.03, "accel": "Apple M3 Ultra 24P+8E+80GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 819.0, "tps": 177.9, "params_b": 1.5, "accel": "Apple M3 Ultra 24P+8E+80GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 819.0, "tps": 36.71, "params_b": 14.77, "accel": "Apple M3 Ultra 24P+8E+80GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 800.0, "tps": 21.28, "params_b": 14.66, "accel": "NVIDIA RTX 5000 Ada Generation Laptop GPU", "model": "Phi 4"}, {"bw": 800.0, "tps": 16.39, "params_b": 14.77, "accel": "NVIDIA RTX 5000 Ada Generation Laptop GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 100.0, "tps": 13.69, "params_b": 8.03, "accel": "Apple M3 4P+4E+10GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 1792.0, "tps": 44.86, "params_b": 8.03, "accel": "NVIDIA RTX PRO 6000 Blackwell Max-Q Workstation ", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 1792.0, "tps": 103.92, "params_b": 1.5, "accel": "NVIDIA RTX PRO 6000 Blackwell Max-Q Workstation ", "model": "Llama 3.2 1B Instruct"}, {"bw": 512.0, "tps": 18.48, "params_b": 14.77, "accel": "AMD Radeon RX 6800", "model": "Qwen2.5 14B Instruct"}, {"bw": 512.0, "tps": 30.88, "params_b": 8.03, "accel": "AMD Radeon RX 6800", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 512.0, "tps": 95.62, "params_b": 1.5, "accel": "AMD Radeon RX 6800", "model": "Llama 3.2 1B Instruct"}, {"bw": 1792.0, "tps": 24.87, "params_b": 14.77, "accel": "NVIDIA RTX PRO 6000 Blackwell Max-Q Workstation ", "model": "Qwen2.5 14B Instruct"}, {"bw": 272.0, "tps": 23.41, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 4060", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 272.0, "tps": 77.01, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 4060", "model": "Llama 3.2 1B Instruct"}, {"bw": 1792.0, "tps": 21.49, "params_b": 14.77, "accel": "NVIDIA RTX PRO 6000 Blackwell Max-Q Workstation ", "model": "Qwen2.5 14B Instruct"}, {"bw": 546.0, "tps": 55.58, "params_b": 8.03, "accel": "Apple M4 Max 12P+4E+40GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 546.0, "tps": 178.05, "params_b": 1.5, "accel": "Apple M4 Max 12P+4E+40GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 546.0, "tps": 178.88, "params_b": 1.5, "accel": "Apple M4 Max 12P+4E+40GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 1792.0, "tps": 122.24, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 5090", "model": "Llama 3.2 1B Instruct"}, {"bw": 1792.0, "tps": 45.28, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 5090", "model": "Llama 3.2 1B Instruct"}, {"bw": 192.0, "tps": 37.64, "params_b": 2.78, "accel": "NVIDIA GeForce GTX 1060 6GB", "model": "Phi2"}, {"bw": 192.0, "tps": 59.09, "params_b": 3.03, "accel": "NVIDIA GeForce GTX 1060 6GB", "model": "gemma-2b-it"}, {"bw": 192.0, "tps": 33.98, "params_b": 3.61, "accel": "NVIDIA GeForce GTX 1060 6GB", "model": "Llama 3.2 3B Instruct"}, {"bw": 192.0, "tps": 77.89, "params_b": 1.5, "accel": "NVIDIA GeForce GTX 1060 6GB", "model": "Llama 3.2 1B Instruct"}, {"bw": 192.0, "tps": 66.77, "params_b": 1.5, "accel": "NVIDIA GeForce GTX 1060 6GB", "model": "Llama 3.2 1B Instruct"}, {"bw": 192.0, "tps": 86.47, "params_b": 1.1, "accel": "NVIDIA GeForce GTX 1060 6GB", "model": "TinyLlama"}, {"bw": 192.0, "tps": 24.61, "params_b": 4.41, "accel": "NVIDIA GeForce GTX 1060 6GB", "model": "Qwen3 4B Thinking 2507"}, {"bw": 192.0, "tps": 50.96, "params_b": 2.03, "accel": "NVIDIA GeForce GTX 1060 6GB", "model": "Qwen3 1.7B"}, {"bw": 192.0, "tps": 62.48, "params_b": 1.78, "accel": "NVIDIA GeForce GTX 1060 6GB", "model": "DeepSeek R1 Distill Qwen 1.5B"}, {"bw": 192.0, "tps": 33.97, "params_b": 3.61, "accel": "NVIDIA GeForce GTX 1060 6GB", "model": "Llama 3.2 3B Instruct"}, {"bw": 192.0, "tps": 17.88, "params_b": 7.25, "accel": "NVIDIA GeForce GTX 1060 6GB", "model": "Mistral-7B-Instruct-v0.3"}, {"bw": 192.0, "tps": 19.12, "params_b": 7.62, "accel": "NVIDIA GeForce GTX 1060 6GB", "model": "DeepSeek R1 Distill Qwen 7B"}, {"bw": 192.0, "tps": 27.3, "params_b": 3.82, "accel": "NVIDIA GeForce GTX 1060 6GB", "model": "Phi3"}, {"bw": 192.0, "tps": 27.87, "params_b": 3.61, "accel": "NVIDIA GeForce GTX 1060 6GB", "model": "Llama 3.2 3B Instruct"}, {"bw": 192.0, "tps": 78.05, "params_b": 1.5, "accel": "NVIDIA GeForce GTX 1060 6GB", "model": "Llama 3.2 1B Instruct"}, {"bw": 192.0, "tps": 14.67, "params_b": 7.27, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "Falcon3 Mamba 7B Instruct"}, {"bw": 192.0, "tps": 99.46, "params_b": 1.1, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "tinyllama_tinyllama-1.1b-chat-v1.0"}, {"bw": 736.0, "tps": 39.14, "params_b": 12.77, "accel": "NVIDIA GeForce RTX 4080 SUPER", "model": "Gemma-3-12B-It"}, {"bw": 200.0, "tps": 91.45, "params_b": 1.5, "accel": "Apple M2 Pro 6P+4E+16GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 120.0, "tps": 77.72, "params_b": 1.5, "accel": "Apple M4 4P+6E+10GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 400.0, "tps": 11.74, "params_b": 14.77, "accel": "Apple M1 Max 8P+2E+24GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 896.0, "tps": 106.6, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 5070 Ti", "model": "Llama 3.2 1B Instruct"}, {"bw": 336.0, "tps": 39.75, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 3060 Laptop GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 336.0, "tps": 147.64, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 3060 Laptop GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 448.0, "tps": 34.2, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 5060 Ti", "model": "Qwen2.5 14B Instruct"}, {"bw": 448.0, "tps": 61.94, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 5060 Ti", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 448.0, "tps": 225.52, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 5060 Ti", "model": "Llama 3.2 1B Instruct"}, {"bw": 200.0, "tps": 23.81, "params_b": 8.03, "accel": "Apple M2 Pro 6P+4E+16GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 400.0, "tps": 44.66, "params_b": 8.03, "accel": "Apple M2 Max 8P+4E+38GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 400.0, "tps": 152.98, "params_b": 1.5, "accel": "Apple M2 Max 8P+4E+38GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 400.0, "tps": 20.59, "params_b": 14.77, "accel": "Apple M2 Max 8P+4E+38GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 192.0, "tps": 74.87, "params_b": 1.5, "accel": "NVIDIA GeForce GTX 1060 6GB", "model": "Llama 3.2 1B Instruct"}, {"bw": 120.0, "tps": 6.5, "params_b": 14.77, "accel": "Apple M4 4P+6E+10GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 68.0, "tps": 38.87, "params_b": 1.5, "accel": "Apple M1 4P+4E+7GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 68.0, "tps": 38.39, "params_b": 1.5, "accel": "Apple M1 4P+4E+7GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 273.0, "tps": 32.09, "params_b": 30.53, "accel": "Apple M4 Pro 10P+4E+20GPU", "model": "Qwen3-Coder-30B-A3B-Instruct"}, {"bw": 448.0, "tps": 212.53, "params_b": 1.5, "accel": "NVIDIA RTX A4000", "model": "Llama 3.2 1B Instruct"}, {"bw": 224.0, "tps": 37.89, "params_b": 8.03, "accel": "NVIDIA RTX 2000 Ada Generation", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 224.0, "tps": 35.85, "params_b": 8.04, "accel": "NVIDIA RTX 2000 Ada Generation", "model": "YandexGPT 5 Lite 8B Instruct"}, {"bw": 224.0, "tps": 175.36, "params_b": 1.1, "accel": "NVIDIA RTX 2000 Ada Generation", "model": ".."}, {"bw": 640.0, "tps": 7.94, "params_b": 14.77, "accel": "NVIDIA RTX A4500 Laptop GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 640.0, "tps": 72.52, "params_b": 1.5, "accel": "NVIDIA RTX A4500 Laptop GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 400.0, "tps": 24.54, "params_b": 8.19, "accel": "Apple M1 Max 8P+2E+24GPU", "model": "Deepseek-R1-0528-Qwen3-8B"}, {"bw": 400.0, "tps": 37.09, "params_b": 3.82, "accel": "Apple M1 Max 8P+2E+24GPU", "model": "Phi 3 Mini 128k Instruct"}, {"bw": 546.0, "tps": 182.78, "params_b": 1.5, "accel": "Apple M4 Max 12P+4E+40GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 360.0, "tps": 210.71, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 3060", "model": "Llama 3.2 1B Instruct"}, {"bw": 546.0, "tps": 189.79, "params_b": 1.5, "accel": "Apple M4 Max 12P+4E+40GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 546.0, "tps": 55.49, "params_b": 8.03, "accel": "Apple M4 Max 12P+4E+40GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 546.0, "tps": 179.05, "params_b": 1.5, "accel": "Apple M4 Max 12P+4E+40GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 112.0, "tps": 13.28, "params_b": 1.5, "accel": "NVIDIA GeForce GTX 1050", "model": "Llama 3.2 1B Instruct"}, {"bw": 546.0, "tps": 62.5, "params_b": 6.74, "accel": "Apple M4 Max 12P+4E+40GPU", "model": "LLaMA v2"}, {"bw": 200.0, "tps": 25.04, "params_b": 8.03, "accel": "Apple M2 Pro 6P+4E+16GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 200.0, "tps": 90.82, "params_b": 1.5, "accel": "Apple M2 Pro 6P+4E+16GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 448.0, "tps": 33.97, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 5060 Ti", "model": "Qwen2.5 14B Instruct"}, {"bw": 448.0, "tps": 33.62, "params_b": 14.77, "accel": "NVIDIA RTX A4000", "model": "Qwen2.5 14B Instruct"}, {"bw": 448.0, "tps": 61.49, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 5060 Ti", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 448.0, "tps": 60.02, "params_b": 8.03, "accel": "NVIDIA RTX A4000", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 448.0, "tps": 220.27, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 5060 Ti", "model": "Llama 3.2 1B Instruct"}, {"bw": 448.0, "tps": 232.64, "params_b": 1.5, "accel": "NVIDIA RTX A4000", "model": "Llama 3.2 1B Instruct"}, {"bw": 1792.0, "tps": 79.73, "params_b": 14.77, "accel": "NVIDIA RTX PRO 6000 Blackwell Workstation Editio", "model": "Qwen2.5 14B Instruct"}, {"bw": 288.0, "tps": 24.32, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 4060 Ti", "model": "Qwen2.5 14B Instruct"}, {"bw": 800.0, "tps": 36.63, "params_b": 14.77, "accel": "Apple M2 Ultra 16P+8E+76GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 360.0, "tps": 197.47, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 3060", "model": "Llama 3.2 1B Instruct"}, {"bw": 200.0, "tps": 13.34, "params_b": 14.77, "accel": "Apple M2 Pro 6P+4E+16GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 360.0, "tps": 26.9, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 3060", "model": "Qwen2.5 14B Instruct"}, {"bw": 936.0, "tps": 65.18, "params_b": 4.41, "accel": "NVIDIA GeForce RTX 3090", "model": "Qwen3 4B"}, {"bw": 936.0, "tps": 176.3, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 3090", "model": "Llama 3.2 1B Instruct"}, {"bw": 936.0, "tps": 66.12, "params_b": 4.41, "accel": "NVIDIA GeForce RTX 3090", "model": "Qwen3 4B"}, {"bw": 936.0, "tps": 151.06, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 3090", "model": "Llama 3.2 1B Instruct"}, {"bw": 936.0, "tps": 59.7, "params_b": 8.19, "accel": "NVIDIA GeForce RTX 3090", "model": "Qwen3 8B"}, {"bw": 936.0, "tps": 39.26, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 3090", "model": "Qwen3 14B"}, {"bw": 400.0, "tps": 37.94, "params_b": 8.03, "accel": "Apple M1 Max 8P+2E+32GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 336.0, "tps": 167.61, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 2060", "model": "Llama 3.2 1B Instruct"}, {"bw": 546.0, "tps": 28.04, "params_b": 14.77, "accel": "Apple M4 Max 12P+4E+40GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 546.0, "tps": 54.19, "params_b": 8.03, "accel": "Apple M4 Max 12P+4E+40GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 546.0, "tps": 179.97, "params_b": 1.5, "accel": "Apple M4 Max 12P+4E+40GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 200.0, "tps": 103.87, "params_b": 1.5, "accel": "Apple M2 Pro 8P+4E+19GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 1792.0, "tps": 45.75, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 5090", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 504.0, "tps": 136.77, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 4070 SUPER", "model": "Llama 3.2 1B Instruct"}, {"bw": 273.0, "tps": 32.74, "params_b": 8.03, "accel": "Apple M4 Pro 10P+4E+20GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 273.0, "tps": 122.38, "params_b": 1.5, "accel": "Apple M4 Pro 10P+4E+20GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 273.0, "tps": 18.17, "params_b": 14.77, "accel": "Apple M4 Pro 10P+4E+20GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 150.0, "tps": 11.9, "params_b": 14.77, "accel": "Apple M3 Pro 5P+6E+14GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 150.0, "tps": 89.92, "params_b": 1.5, "accel": "Apple M3 Pro 6P+6E+18GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 448.0, "tps": 33.75, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 5060 Ti", "model": "Qwen2.5 14B Instruct"}, {"bw": 448.0, "tps": 32.87, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 5060 Ti", "model": "Qwen2.5 14B Instruct"}, {"bw": 120.0, "tps": 75.21, "params_b": 1.5, "accel": "Apple M4 4P+6E+10GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 100.0, "tps": 52.89, "params_b": 1.5, "accel": "Apple M2 4P+4E+8GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 100.0, "tps": 18.81, "params_b": 8.03, "accel": "Apple M2 4P+4E+8GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 100.0, "tps": 61.58, "params_b": 1.5, "accel": "Apple M3 4P+4E", "model": "Llama 3.2 1B Instruct"}, {"bw": 100.0, "tps": 65.76, "params_b": 1.5, "accel": "Apple M3 4P+4E+10GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 936.0, "tps": 330.61, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 3090", "model": "Llama 3.2 1B Instruct"}, {"bw": 400.0, "tps": 125.93, "params_b": 1.5, "accel": "Apple M1 Max 8P+2E+32GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 546.0, "tps": 181.39, "params_b": 1.5, "accel": "Apple M4 Max 12P+4E+40GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 1008.0, "tps": 372.07, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 3090 Ti", "model": "Llama 3.2 1B Instruct"}, {"bw": 300.0, "tps": 184.93, "params_b": 1.5, "accel": "NVIDIA L4", "model": "Llama 3.2 1B Instruct"}, {"bw": 300.0, "tps": 189.13, "params_b": 1.5, "accel": "NVIDIA L4", "model": "Llama 3.2 1B Instruct"}, {"bw": 1792.0, "tps": 41.35, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 5090", "model": "Llama 3.2 1B Instruct"}, {"bw": 1792.0, "tps": 41.51, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 5090", "model": "Llama 3.2 1B Instruct"}, {"bw": 960.0, "tps": 40.97, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 5080", "model": "Llama 3.2 1B Instruct"}, {"bw": 960.0, "tps": 41.18, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 5080", "model": "Llama 3.2 1B Instruct"}, {"bw": 960.0, "tps": 41.72, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 5080", "model": "Llama 3.2 1B Instruct"}, {"bw": 960.0, "tps": 41.07, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 5080", "model": "Llama 3.2 1B Instruct"}, {"bw": 400.0, "tps": 125.85, "params_b": 1.5, "accel": "Apple M1 Max 8P+2E+32GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 400.0, "tps": 149.52, "params_b": 1.5, "accel": "Apple M3 Max 12P+4E+40GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 936.0, "tps": 58.35, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 3090", "model": "Qwen2.5 14B Instruct"}, {"bw": 936.0, "tps": 40.94, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 3090", "model": "Qwen2.5 14B Instruct"}, {"bw": 400.0, "tps": 19.58, "params_b": 14.77, "accel": "Apple M1 Max 8P+2E+32GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 512.0, "tps": 40.25, "params_b": 8.03, "accel": "AMD Radeon RX 6900 XT", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 288.0, "tps": 14.86, "params_b": 14.77, "accel": "Quadro P5000", "model": "Qwen2.5 14B Instruct"}, {"bw": 100.0, "tps": 7.93, "params_b": 8.03, "accel": "Apple M3 4P+4E+10GPU", "model": "Models Mlx Community Meta Llama 3.1 8B Instruct Bf16"}, {"bw": 100.0, "tps": 9.69, "params_b": 8.03, "accel": "Apple M3 4P+4E+10GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 448.0, "tps": 34.32, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 5060 Ti", "model": "Qwen2.5 14B Instruct"}, {"bw": 200.0, "tps": 78.71, "params_b": 1.5, "accel": "Apple M1 Pro 8P+2E+16GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 273.0, "tps": 18.05, "params_b": 14.77, "accel": "Apple M4 Pro 10P+4E+20GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 200.0, "tps": 20.45, "params_b": 8.03, "accel": "Apple M1 Pro 6P+2E+14GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 150.0, "tps": 12.0, "params_b": 14.77, "accel": "Apple M3 Pro 5P+6E+14GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 200.0, "tps": 11.02, "params_b": 14.77, "accel": "Apple M1 Pro 6P+2E+14GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 360.0, "tps": 28.62, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 3060", "model": "Qwen2.5 14B Instruct"}, {"bw": 936.0, "tps": 50.73, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 3090", "model": "Qwen2.5 14B Instruct"}, {"bw": 936.0, "tps": 193.91, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 3090", "model": "Llama 3.2 1B Instruct"}, {"bw": 936.0, "tps": 85.43, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 3090", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 936.0, "tps": 83.08, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 3090", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 936.0, "tps": 192.82, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 3090", "model": "Llama 3.2 1B Instruct"}, {"bw": 272.0, "tps": 41.21, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 4060", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 800.0, "tps": 33.33, "params_b": 14.77, "accel": "Apple M1 Ultra 16P+4E+64GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 400.0, "tps": 19.01, "params_b": 14.77, "accel": "Apple M3 Max 10P+4E+30GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 120.0, "tps": 16.56, "params_b": 8.03, "accel": "Apple M4 4P+6E+10GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 400.0, "tps": 37.68, "params_b": 8.03, "accel": "Apple M3 Max 10P+4E+30GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 760.0, "tps": 78.27, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 3080", "model": "Llama 3 8b Bnb 4bit"}, {"bw": 760.0, "tps": 41.05, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 3080", "model": "Qwen2.5 14B Instruct"}, {"bw": 760.0, "tps": 75.74, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 3080", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 760.0, "tps": 205.49, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 3080", "model": "Llama 3.2 1B Instruct"}, {"bw": 400.0, "tps": 129.46, "params_b": 1.5, "accel": "Apple M2 Max 8P+4E+30GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 224.0, "tps": 72.91, "params_b": 1.5, "accel": "AMD Radeon RX 6600", "model": "Llama 3.2 1B Instruct"}, {"bw": 68.0, "tps": 39.66, "params_b": 1.5, "accel": "Apple M1 4P+4E+8GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 448.0, "tps": 180.34, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 3070", "model": "Llama 3.2 1B Instruct"}, {"bw": 89.0, "tps": 11.79, "params_b": 1.5, "accel": "AMD Radeon 780M Graphics", "model": "Llama 3.2 1B Instruct"}, {"bw": 150.0, "tps": 88.11, "params_b": 1.5, "accel": "Apple M3 Pro 5P+6E+14GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 336.0, "tps": 86.42, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 3060 Laptop GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 288.0, "tps": 19.72, "params_b": 14.77, "accel": "NVIDIA RTX A2000 12GB", "model": "Qwen2.5 14B Instruct"}, {"bw": 200.0, "tps": 11.16, "params_b": 14.77, "accel": "Apple M1 Pro 8P+2E+16GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 200.0, "tps": 26.34, "params_b": 8.03, "accel": "Apple M2 Pro 8P+4E+19GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 484.0, "tps": 20.15, "params_b": 14.77, "accel": "NVIDIA GeForce GTX 1080 Ti", "model": "DeepSeek R1 Distill Qwen 14B"}, {"bw": 484.0, "tps": 21.07, "params_b": 14.77, "accel": "NVIDIA GeForce GTX 1080 Ti", "model": "Qwen2.5 14B Instruct"}, {"bw": 484.0, "tps": 20.17, "params_b": 14.77, "accel": "NVIDIA GeForce GTX 1080 Ti", "model": "Qwen2.5 14B Instruct"}, {"bw": 68.0, "tps": 5.43, "params_b": 14.77, "accel": "Apple M1 4P+4E+8GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 484.0, "tps": 127.59, "params_b": 1.3, "accel": "NVIDIA GeForce GTX 1080 Ti", "model": "Gemma 3 1b It"}, {"bw": 288.0, "tps": 218.91, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 4060 Ti", "model": "Llama 3.2 1B Instruct"}, {"bw": 484.0, "tps": 57.33, "params_b": 4.55, "accel": "NVIDIA GeForce GTX 1080 Ti", "model": "Gemma 3 4b It"}, {"bw": 200.0, "tps": 99.29, "params_b": 1.5, "accel": "Apple M2 Pro 8P+4E+19GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 200.0, "tps": 26.28, "params_b": 8.03, "accel": "Apple M2 Pro 8P+4E+19GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 819.0, "tps": 35.87, "params_b": 14.77, "accel": "Apple M3 Ultra 24P+8E+80GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 288.0, "tps": 57.09, "params_b": 1.5, "accel": "AMD Radeon RX 7600 XT", "model": "Llama 3.2 1B Instruct"}, {"bw": 484.0, "tps": 57.39, "params_b": 4.55, "accel": "NVIDIA GeForce GTX 1080 Ti", "model": "Gemma 3 4b It"}, {"bw": 484.0, "tps": 128.67, "params_b": 1.3, "accel": "NVIDIA GeForce GTX 1080 Ti", "model": "Gemma 3 1b It"}, {"bw": 200.0, "tps": 21.23, "params_b": 8.03, "accel": "Apple M1 Pro 8P+2E+16GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 200.0, "tps": 79.1, "params_b": 1.5, "accel": "Apple M1 Pro 8P+2E+16GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 360.0, "tps": 27.14, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 3060", "model": "Qwen2.5 14B Instruct"}, {"bw": 448.0, "tps": 65.14, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 3060 Ti", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 400.0, "tps": 125.96, "params_b": 1.5, "accel": "Apple M1 Max 8P+2E+32GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 512.0, "tps": 22.16, "params_b": 14.77, "accel": "AMD Radeon RX 6900 XT", "model": "Qwen2.5 14B Instruct"}, {"bw": 484.0, "tps": 19.64, "params_b": 14.77, "accel": "NVIDIA GeForce GTX 1080 Ti", "model": "Qwen2.5 14B Instruct"}, {"bw": 484.0, "tps": 145.63, "params_b": 1.5, "accel": "NVIDIA GeForce GTX 1080 Ti", "model": "Llama 3.2 1B Instruct"}, {"bw": 100.0, "tps": 63.72, "params_b": 1.5, "accel": "Apple M3 4P+4E+10GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 273.0, "tps": 23.04, "params_b": 8.37, "accel": "Apple M4 Pro 10P+4E+20GPU", "model": "Granite 3.3 8b Instruct"}, {"bw": 512.0, "tps": 24.21, "params_b": 14.77, "accel": "AMD Radeon RX 6900 XT", "model": "Qwen2.5 14B Instruct"}, {"bw": 768.0, "tps": 28.45, "params_b": 14.77, "accel": "NVIDIA RTX A5000 Laptop GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 936.0, "tps": 59.07, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 3090", "model": "Qwen2.5 14B Instruct"}, {"bw": 1008.0, "tps": 25.76, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 4090", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 1008.0, "tps": 43.43, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 4090", "model": "Llama 3.2 1B Instruct"}, {"bw": 1008.0, "tps": 13.12, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 4090", "model": "Qwen2.5 14B Instruct"}, {"bw": 112.0, "tps": 44.55, "params_b": 1.5, "accel": "NVIDIA GeForce GTX 1050 Ti", "model": "Llama 3.2 1B Instruct"}, {"bw": 546.0, "tps": 183.94, "params_b": 1.5, "accel": "Apple M4 Max 12P+4E+40GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 200.0, "tps": 11.58, "params_b": 14.77, "accel": "Apple M1 Pro 8P+2E+16GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 624.0, "tps": 38.89, "params_b": 8.03, "accel": "AMD Radeon RX 7800 XT", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 624.0, "tps": 120.68, "params_b": 1.5, "accel": "AMD Radeon RX 7800 XT", "model": "Llama 3.2 1B Instruct"}, {"bw": 624.0, "tps": 22.99, "params_b": 14.77, "accel": "AMD Radeon RX 7800 XT", "model": "Qwen2.5 14B Instruct"}, {"bw": 960.0, "tps": 45.33, "params_b": 8.03, "accel": "Radeon RX 7900 XTX", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 960.0, "tps": 27.95, "params_b": 14.77, "accel": "Radeon RX 7900 XTX", "model": "Qwen2.5 14B Instruct"}, {"bw": 960.0, "tps": 133.02, "params_b": 1.5, "accel": "Radeon RX 7900 XTX", "model": "Llama 3.2 1B Instruct"}, {"bw": 936.0, "tps": 52.25, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 3090", "model": "Qwen2.5 14B Instruct"}, {"bw": 936.0, "tps": 318.78, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 3090", "model": "Llama 3.2 1B Instruct"}, {"bw": 100.0, "tps": 6.09, "params_b": 14.77, "accel": "Apple M3 4P+4E+10GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 224.0, "tps": 26.01, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 3050", "model": "DeepSeek R1 Distill Llama 8B"}, {"bw": 224.0, "tps": 27.31, "params_b": 7.62, "accel": "NVIDIA GeForce RTX 3050", "model": "DeepSeek R1 Distill Qwen 7B"}, {"bw": 224.0, "tps": 60.38, "params_b": 1.78, "accel": "NVIDIA GeForce RTX 3050", "model": "DeepSeek R1 Distill Qwen 1.5B"}, {"bw": 224.0, "tps": 23.54, "params_b": 8.37, "accel": "NVIDIA GeForce RTX 3050", "model": "Granite 3.3 8b Instruct"}, {"bw": 936.0, "tps": 54.67, "params_b": 10.16, "accel": "NVIDIA GeForce RTX 3090", "model": "Gemma Ifable"}, {"bw": 936.0, "tps": 25.09, "params_b": 28.42, "accel": "NVIDIA GeForce RTX 3090", "model": "Fallen Gemma3 27B v1"}, {"bw": 936.0, "tps": 77.64, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 3090", "model": "Meta Llama 3.1 8B Instruct Abliterated"}, {"bw": 224.0, "tps": 98.01, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 3050", "model": "Llama 3.2 1B Instruct"}, {"bw": 224.0, "tps": 26.0, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 3050", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 224.0, "tps": 36.96, "params_b": 4.55, "accel": "NVIDIA GeForce RTX 3050", "model": "Gemma 3 4b It"}, {"bw": 288.0, "tps": 105.18, "params_b": 1.5, "accel": "NVIDIA RTX A2000 Laptop GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 288.0, "tps": 105.7, "params_b": 1.5, "accel": "NVIDIA RTX A2000 Laptop GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 100.0, "tps": 43.08, "params_b": 1.5, "accel": "Apple M2 4P+4E+10GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 1008.0, "tps": 359.17, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 4090 D", "model": "Llama 3.2 1B Instruct"}, {"bw": 1008.0, "tps": 70.46, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 4090 D", "model": "Qwen2.5 14B Instruct"}, {"bw": 384.0, "tps": 102.79, "params_b": 1.5, "accel": "AMD Radeon RX 6700 XT", "model": "Llama 3.2 1B Instruct"}, {"bw": 200.0, "tps": 10.78, "params_b": 14.77, "accel": "Apple M1 Pro 6P+2E+14GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 120.0, "tps": 15.89, "params_b": 8.03, "accel": "Apple M4 4P+6E+10GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 120.0, "tps": 8.12, "params_b": 14.77, "accel": "Apple M4 4P+6E+10GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 192.0, "tps": 141.8, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 3050 Ti Laptop GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 400.0, "tps": 17.23, "params_b": 14.77, "accel": "Apple M1 Max 8P+2E+32GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 68.0, "tps": 38.58, "params_b": 1.5, "accel": "Apple M1 4P+4E+7GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 384.0, "tps": 15.71, "params_b": 14.77, "accel": "AMD Radeon RX 6700 XT", "model": "Qwen2.5 14B Instruct"}, {"bw": 400.0, "tps": 20.53, "params_b": 14.77, "accel": "Apple M3 Max 10P+4E+30GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 273.0, "tps": 110.67, "params_b": 1.5, "accel": "Apple M4 Pro 8P+4E+16GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 273.0, "tps": 30.66, "params_b": 8.03, "accel": "Apple M4 Pro 8P+4E+16GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 273.0, "tps": 16.57, "params_b": 14.77, "accel": "Apple M4 Pro 8P+4E+16GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 256.0, "tps": 24.28, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 4060 Laptop GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 1792.0, "tps": 81.06, "params_b": 14.77, "accel": "NVIDIA RTX PRO 6000 Blackwell Workstation Editio", "model": "Qwen2.5 14B Instruct"}, {"bw": 1792.0, "tps": 138.18, "params_b": 8.03, "accel": "NVIDIA RTX PRO 6000 Blackwell Workstation Editio", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 1792.0, "tps": 374.26, "params_b": 1.5, "accel": "NVIDIA RTX PRO 6000 Blackwell Workstation Editio", "model": "Llama 3.2 1B Instruct"}, {"bw": 100.0, "tps": 59.51, "params_b": 1.5, "accel": "Apple M3 4P+4E+10GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 100.0, "tps": 65.61, "params_b": 1.5, "accel": "Apple M3 4P+4E+10GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 546.0, "tps": 178.63, "params_b": 1.5, "accel": "Apple M4 Max 12P+4E+40GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 546.0, "tps": 26.48, "params_b": 14.77, "accel": "Apple M4 Max 12P+4E+40GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 200.0, "tps": 14.12, "params_b": 14.77, "accel": "Apple M2 Pro 8P+4E+19GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 200.0, "tps": 11.91, "params_b": 14.77, "accel": "Apple M1 Pro 8P+2E+16GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 200.0, "tps": 99.62, "params_b": 1.5, "accel": "Apple M2 Pro 8P+4E+19GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 960.0, "tps": 16.61, "params_b": 1.5, "accel": "Radeon RX 7900 XTX", "model": "Llama-3.2-1B-Instruct"}, {"bw": 624.0, "tps": 13.92, "params_b": 1.5, "accel": "AMD Radeon RX 7800 XT", "model": "Llama-3.2-1B-Instruct"}, {"bw": 960.0, "tps": 20.53, "params_b": 4.55, "accel": "Radeon RX 7900 XTX", "model": "Gemma-3-4B-It-Qat"}, {"bw": 100.0, "tps": 48.71, "params_b": 14.77, "accel": "Tesla V100-SXM2-32GB", "model": "Qwen2.5 14B Instruct"}, {"bw": 100.0, "tps": 248.98, "params_b": 1.5, "accel": "Tesla V100-SXM2-32GB", "model": "Llama 3.2 1B Instruct"}, {"bw": 384.0, "tps": 5.4, "params_b": 8.03, "accel": "AMD Radeon RX 6700 XT", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 384.0, "tps": 12.85, "params_b": 1.5, "accel": "AMD Radeon RX 6700 XT", "model": "Llama 3.2 1B Instruct"}, {"bw": 936.0, "tps": 301.17, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 3090", "model": "Llama 3.2 1B Instruct"}, {"bw": 400.0, "tps": 18.05, "params_b": 14.77, "accel": "Apple M1 Max 8P+2E+32GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 400.0, "tps": 33.65, "params_b": 8.03, "accel": "Apple M1 Max 8P+2E+32GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 400.0, "tps": 124.48, "params_b": 1.5, "accel": "Apple M1 Max 8P+2E+32GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 448.0, "tps": 28.4, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 3080 Laptop GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 448.0, "tps": 27.43, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 3080 Laptop GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 400.0, "tps": 20.5, "params_b": 14.77, "accel": "Apple M1 Max 8P+2E+32GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 192.0, "tps": 21.02, "params_b": 1.5, "accel": "NVIDIA T1200 Laptop GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 200.0, "tps": 69.18, "params_b": 1.5, "accel": "Apple M1 Pro 6P+2E+14GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 624.0, "tps": 23.04, "params_b": 14.77, "accel": "AMD Radeon RX 7800 XT", "model": "Qwen2.5 14B Instruct"}, {"bw": 512.0, "tps": 19.97, "params_b": 14.77, "accel": "AMD Radeon RX 6800 XT", "model": "Qwen2.5 14B Instruct"}, {"bw": 960.0, "tps": 16.37, "params_b": 1.5, "accel": "Radeon RX 7900 XTX", "model": "Llama 3.2 1B Instruct"}, {"bw": 273.0, "tps": 32.2, "params_b": 30.53, "accel": "Apple M4 Pro 10P+4E+20GPU", "model": "Qwen3-30B-A3B"}, {"bw": 936.0, "tps": 320.92, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 3090", "model": "Llama 3.2 1B Instruct"}, {"bw": 273.0, "tps": 18.07, "params_b": 14.77, "accel": "Apple M4 Pro 10P+4E+20GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 68.0, "tps": 47.52, "params_b": 0.75, "accel": "Apple M1 4P+4E+8GPU", "model": "Qwen3 0.6B"}, {"bw": 336.0, "tps": 101.13, "params_b": 1.1, "accel": "NVIDIA GeForce RTX 3060 Laptop GPU", "model": "active"}, {"bw": 336.0, "tps": 126.04, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 3060 Laptop GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 336.0, "tps": 73.81, "params_b": 1.3, "accel": "NVIDIA GeForce RTX 3060 Laptop GPU", "model": "Gemma 3 1b It"}, {"bw": 336.0, "tps": 21.04, "params_b": 1.3, "accel": "NVIDIA GeForce RTX 3060 Laptop GPU", "model": "Gemma 3 1b It"}, {"bw": 336.0, "tps": 22.08, "params_b": 1.3, "accel": "NVIDIA GeForce RTX 3060 Laptop GPU", "model": "Gemma 3 1b It"}, {"bw": 200.0, "tps": 82.53, "params_b": 0.75, "accel": "Apple M1 Pro 8P+2E+16GPU", "model": "Qwen3 0.6B Instruct"}, {"bw": 936.0, "tps": 43.21, "params_b": 30.53, "accel": "NVIDIA GeForce RTX 3090", "model": "Qwen3 30B A3B"}, {"bw": 936.0, "tps": 116.8, "params_b": 4.41, "accel": "NVIDIA GeForce RTX 3090", "model": "Qwen3-4B"}, {"bw": 150.0, "tps": 12.2, "params_b": 14.77, "accel": "Apple M3 Pro 5P+6E+14GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 336.0, "tps": 110.76, "params_b": 1.1, "accel": "NVIDIA GeForce RTX 3060 Laptop GPU", "model": "active"}, {"bw": 336.0, "tps": 134.68, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 3060 Laptop GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 150.0, "tps": 21.46, "params_b": 8.03, "accel": "Apple M3 Pro 5P+6E+14GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 150.0, "tps": 88.26, "params_b": 1.5, "accel": "Apple M3 Pro 5P+6E+14GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 150.0, "tps": 88.22, "params_b": 1.5, "accel": "Apple M3 Pro 5P+6E+14GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 448.0, "tps": 236.18, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 5060 Ti", "model": "Llama 3.2 1B Instruct"}, {"bw": 448.0, "tps": 60.58, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 5060 Ti", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 336.0, "tps": 135.75, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 3060 Laptop GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 112.0, "tps": 50.64, "params_b": 0.63, "accel": "NVIDIA T550 Laptop GPU", "model": "Qwen2.5 Coder 0.5B Instruct"}, {"bw": 273.0, "tps": 123.35, "params_b": 1.5, "accel": "Apple M4 Pro 10P+4E+20GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 100.0, "tps": 53.86, "params_b": 1.5, "accel": "Apple M2 4P+4E+10GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 120.0, "tps": 74.5, "params_b": 1.5, "accel": "Apple M4 4P+6E+10GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 100.0, "tps": 57.01, "params_b": 1.5, "accel": "Apple M2 4P+4E+10GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 100.0, "tps": 56.46, "params_b": 1.5, "accel": "Apple M2 4P+4E+10GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 448.0, "tps": 27.38, "params_b": 1.38, "accel": "AMD Radeon RX 5700 XT", "model": "Granite 3.1 1b A400M Instruct"}, {"bw": 288.0, "tps": 26.94, "params_b": 12.77, "accel": "NVIDIA GeForce RTX 4060 Ti", "model": "Amoral Gemma3 12B"}, {"bw": 288.0, "tps": 32.37, "params_b": 10.16, "accel": "NVIDIA GeForce RTX 4060 Ti", "model": "gemma-2-9b-it"}, {"bw": 288.0, "tps": 62.96, "params_b": 4.55, "accel": "NVIDIA GeForce RTX 4060 Ti", "model": "Gemma 3 4b It Qat"}, {"bw": 288.0, "tps": 24.52, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 4060 Ti", "model": "Qwen2.5 14B Instruct"}, {"bw": 288.0, "tps": 44.76, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 4060 Ti", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 288.0, "tps": 170.77, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 4060 Ti", "model": "Llama 3.2 1B Instruct"}, {"bw": 504.0, "tps": 10.9, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 4070 SUPER", "model": "Qwen2.5 14B Instruct"}, {"bw": 672.0, "tps": 21.97, "params_b": 28.42, "accel": "Quadro RTX 6000", "model": "Gemma 3 27b"}, {"bw": 273.0, "tps": 6.21, "params_b": 32.76, "accel": "Apple M4 Pro 10P+4E+20GPU", "model": "DeepSeek R1 Distill Qwen 32B"}, {"bw": 672.0, "tps": 40.06, "params_b": 12.77, "accel": "Quadro RTX 6000", "model": "Gemma 3 12b"}, {"bw": 672.0, "tps": 104.67, "params_b": 2.67, "accel": "Quadro RTX 6000", "model": "EXAONE-Deep-2.4B"}, {"bw": 400.0, "tps": 17.47, "params_b": 14.77, "accel": "Apple M1 Max 8P+2E+24GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 400.0, "tps": 32.12, "params_b": 8.03, "accel": "Apple M1 Max 8P+2E+24GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 400.0, "tps": 106.11, "params_b": 1.5, "accel": "Apple M1 Max 8P+2E+24GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 360.0, "tps": 30.42, "params_b": 12.77, "accel": "NVIDIA GeForce RTX 3060", "model": "Gemma 3 12b It Qat"}, {"bw": 360.0, "tps": 65.06, "params_b": 4.55, "accel": "NVIDIA GeForce RTX 3060", "model": "Gemma 3 4b It Qat"}, {"bw": 360.0, "tps": 29.48, "params_b": 12.77, "accel": "NVIDIA GeForce RTX 3060", "model": "Amoral Gemma3 12B"}, {"bw": 360.0, "tps": 36.4, "params_b": 10.16, "accel": "NVIDIA GeForce RTX 3060", "model": "gemma-2-9b-it"}, {"bw": 360.0, "tps": 28.06, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 3060", "model": "Qwen2.5 14B Instruct"}, {"bw": 360.0, "tps": 49.48, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 3060", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 360.0, "tps": 168.95, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 3060", "model": "Llama 3.2 1B Instruct"}, {"bw": 273.0, "tps": 17.11, "params_b": 14.77, "accel": "Apple M4 Pro 10P+4E+20GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 200.0, "tps": 11.86, "params_b": 14.77, "accel": "Apple M1 Pro 8P+2E+16GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 273.0, "tps": 32.08, "params_b": 8.03, "accel": "Apple M4 Pro 10P+4E+20GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 273.0, "tps": 117.53, "params_b": 1.5, "accel": "Apple M4 Pro 10P+4E+20GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 200.0, "tps": 21.98, "params_b": 8.03, "accel": "Apple M1 Pro 8P+2E+16GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 200.0, "tps": 78.08, "params_b": 1.5, "accel": "Apple M1 Pro 8P+2E+16GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 672.0, "tps": 81.31, "params_b": 4.55, "accel": "Quadro RTX 6000", "model": "Gemma 3 4b It Abliterated"}, {"bw": 672.0, "tps": 39.73, "params_b": 12.77, "accel": "Quadro RTX 6000", "model": "Gemma 3 12b"}, {"bw": 400.0, "tps": 37.28, "params_b": 8.03, "accel": "Apple M3 Max 10P+4E+30GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 448.0, "tps": 25.35, "params_b": 8.03, "accel": "AMD Radeon RX 5700 XT", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 360.0, "tps": 49.7, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 3060", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 400.0, "tps": 18.46, "params_b": 14.77, "accel": "Apple M1 Max 8P+2E+32GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 360.0, "tps": 200.82, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 3060", "model": "Llama 3.2 1B Instruct"}, {"bw": 360.0, "tps": 27.6, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 3060", "model": "Qwen2.5 14B Instruct"}, {"bw": 400.0, "tps": 20.66, "params_b": 14.77, "accel": "Apple M3 Max 10P+4E+30GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 400.0, "tps": 20.87, "params_b": 14.77, "accel": "Apple M3 Max 10P+4E+30GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 400.0, "tps": 37.74, "params_b": 8.03, "accel": "Apple M3 Max 10P+4E+30GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 400.0, "tps": 132.87, "params_b": 1.5, "accel": "Apple M3 Max 10P+4E+30GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 546.0, "tps": 27.26, "params_b": 14.77, "accel": "Apple M4 Max 10P+4E+32GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 616.0, "tps": 68.32, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 2080 Ti", "model": "DeepSeek R1 Distill Llama 8B"}, {"bw": 120.0, "tps": 72.52, "params_b": 1.5, "accel": "Apple M4 4P+6E+10GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 120.0, "tps": 8.01, "params_b": 14.77, "accel": "Apple M4 4P+6E+10GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 273.0, "tps": 17.97, "params_b": 14.77, "accel": "Apple M4 Pro 10P+4E+20GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 546.0, "tps": 180.7, "params_b": 1.5, "accel": "Apple M4 Max 12P+4E+40GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 100.0, "tps": 250.3, "params_b": 1.5, "accel": "Tesla V100-SXM2-16GB", "model": "Llama 3.2 1B Instruct"}, {"bw": 288.0, "tps": 20.11, "params_b": 3.4, "accel": "NVIDIA GeForce GTX 1660 Ti", "model": "qwen2.5-3b-instruct"}, {"bw": 300.0, "tps": 13.32, "params_b": 14.77, "accel": "Tesla T4", "model": "Qwen2.5 14B Instruct"}, {"bw": 336.0, "tps": 34.75, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 3060 Laptop GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 336.0, "tps": 30.0, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 3060 Laptop GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 288.0, "tps": 562.73, "params_b": 0.0, "accel": "NVIDIA GeForce GTX 1660 Ti", "model": "Raincandy U TinyStories 656K"}, {"bw": 760.0, "tps": 76.6, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 3080", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 760.0, "tps": 184.59, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 3080", "model": "Llama 3.2 1B Instruct"}, {"bw": 448.0, "tps": 262.73, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 3070", "model": "Llama 3.2 1B Instruct"}, {"bw": 960.0, "tps": 23.7, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 5080", "model": "Qwen2.5 14B Instruct"}, {"bw": 960.0, "tps": 103.49, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 5080", "model": "Llama 3.2 1B Instruct"}, {"bw": 960.0, "tps": 23.93, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 5080", "model": "Qwen2.5 14B Instruct"}, {"bw": 400.0, "tps": 146.34, "params_b": 1.5, "accel": "Apple M3 Max 12P+4E+40GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 400.0, "tps": 45.57, "params_b": 8.03, "accel": "Apple M3 Max 12P+4E+40GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 288.0, "tps": 21.94, "params_b": 10.16, "accel": "NVIDIA GeForce RTX 4060 Ti", "model": "gemma2"}, {"bw": 89.0, "tps": 2.56, "params_b": 12.77, "accel": "AMD Radeon 780M", "model": "Gemma 3 12b It"}, {"bw": 360.0, "tps": 13.37, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 3060", "model": "Qwen2.5 14B Instruct"}, {"bw": 360.0, "tps": 23.2, "params_b": 4.55, "accel": "NVIDIA GeForce RTX 3060", "model": "Gemma 3 4b It"}, {"bw": 256.0, "tps": 24.39, "params_b": 1.5, "accel": "NVIDIA GeForce GTX 1070 with Max-Q Design", "model": "Llama 3.2 1B Instruct"}, {"bw": 448.0, "tps": 24.91, "params_b": 7.62, "accel": "NVIDIA GeForce RTX 3070 Ti Laptop GPU", "model": "Dir"}, {"bw": 624.0, "tps": 6.48, "params_b": 12.77, "accel": "AMD Radeon RX 7800 XT", "model": "Gemma 3 12b It"}, {"bw": 224.0, "tps": 27.57, "params_b": 8.03, "accel": "NVIDIA RTX A2000 8GB Laptop GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 224.0, "tps": 27.47, "params_b": 8.03, "accel": "NVIDIA RTX A2000 8GB Laptop GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 224.0, "tps": 108.9, "params_b": 1.5, "accel": "NVIDIA RTX A2000 8GB Laptop GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 192.0, "tps": 8.98, "params_b": 8.03, "accel": "NVIDIA GeForce GTX 1660", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 224.0, "tps": 167.57, "params_b": 1.5, "accel": "NVIDIA RTX 2000 Ada Generation", "model": "Llama 3.2 1B Instruct"}, {"bw": 224.0, "tps": 20.3, "params_b": 14.77, "accel": "NVIDIA RTX 2000 Ada Generation", "model": "Qwen2.5 14B Instruct"}, {"bw": 224.0, "tps": 165.0, "params_b": 1.5, "accel": "NVIDIA RTX 2000 Ada Generation", "model": "Llama 3.2 1B Instruct"}, {"bw": 224.0, "tps": 36.82, "params_b": 8.03, "accel": "NVIDIA RTX 2000 Ada Generation", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 1008.0, "tps": 41.29, "params_b": 3.82, "accel": "NVIDIA GeForce RTX 4090", "model": "Phi3"}, {"bw": 936.0, "tps": 339.18, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 3090", "model": "Llama 3.2 1B Instruct"}, {"bw": 936.0, "tps": 61.77, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 3090", "model": "Qwen2.5 14B Instruct"}, {"bw": 936.0, "tps": 58.95, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 3090", "model": "Qwen2.5 14B Instruct"}, {"bw": 936.0, "tps": 61.97, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 3090", "model": "Qwen2.5 14B Instruct"}, {"bw": 936.0, "tps": 60.98, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 3090", "model": "Qwen2.5 14B Instruct"}, {"bw": 120.0, "tps": 75.88, "params_b": 1.5, "accel": "Apple M4 4P+6E+10GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 936.0, "tps": 145.39, "params_b": 4.45, "accel": "NVIDIA GeForce RTX 3090", "model": "Phi 4 Mini Instruct"}, {"bw": 512.0, "tps": 2.49, "params_b": 14.77, "accel": "AMD Radeon RX 6800", "model": "Qwen2.5 14B Instruct"}, {"bw": 512.0, "tps": 9.97, "params_b": 1.5, "accel": "AMD Radeon RX 6800", "model": "Llama 3.2 1B Instruct"}, {"bw": 936.0, "tps": 186.29, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 3090", "model": "Llama 3.2 1B Instruct"}, {"bw": 504.0, "tps": 10.79, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 4070", "model": "Cogito v1 Preview Qwen 14B"}, {"bw": 504.0, "tps": 36.7, "params_b": 12.77, "accel": "NVIDIA GeForce RTX 4070", "model": "Gemma 3 12b It"}, {"bw": 400.0, "tps": 7.18, "params_b": 23.57, "accel": "Apple M1 Max 8P+2E+24GPU", "model": "Mistral Small 3.1 24B Instruct 2503"}, {"bw": 546.0, "tps": 22.25, "params_b": 14.77, "accel": "Apple M4 Max 10P+4E+32GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 1008.0, "tps": 31.68, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 4090", "model": "Qwen2.5 14B Instruct"}, {"bw": 960.0, "tps": 415.8, "params_b": 1.5, "accel": "NVIDIA RTX 6000 Ada Generation", "model": "Llama 3.2 1B Instruct"}, {"bw": 616.0, "tps": 57.07, "params_b": 7.62, "accel": "NVIDIA GeForce RTX 2080 Ti", "model": "Qwen2.5.1 Coder 7B Instruct"}, {"bw": 960.0, "tps": 120.98, "params_b": 8.03, "accel": "NVIDIA RTX 6000 Ada Generation", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 616.0, "tps": 54.16, "params_b": 7.62, "accel": "NVIDIA GeForce RTX 2080 Ti", "model": "DeepSeek R1 Distill Qwen 7B"}, {"bw": 616.0, "tps": 35.89, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 2080 Ti", "model": "Qwen2.5 Coder 14B Instruct AWQ"}, {"bw": 616.0, "tps": 37.87, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 2080 Ti", "model": "Qwen2.5 Coder 14B Instruct AWQ"}, {"bw": 616.0, "tps": 68.03, "params_b": 7.62, "accel": "NVIDIA GeForce RTX 2080 Ti", "model": "Qwen2.5.1 Coder 7B Instruct"}, {"bw": 616.0, "tps": 53.18, "params_b": 9.4, "accel": "NVIDIA GeForce RTX 2080 Ti", "model": "codegeex4-all-9b"}, {"bw": 616.0, "tps": 68.22, "params_b": 7.62, "accel": "NVIDIA GeForce RTX 2080 Ti", "model": "Qwen2.5 Coder 7B Instruct GGUF"}, {"bw": 616.0, "tps": 57.89, "params_b": 7.62, "accel": "NVIDIA GeForce RTX 2080 Ti", "model": "Qwen2.5 Coder 7B Instruct"}, {"bw": 616.0, "tps": 2.19, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 2080 Ti", "model": "Qwen2.5 Coder 14B Instruct AWQ"}, {"bw": 960.0, "tps": 70.04, "params_b": 14.77, "accel": "NVIDIA RTX 6000 Ada Generation", "model": "Qwen2.5 14B Instruct"}, {"bw": 273.0, "tps": 16.76, "params_b": 14.77, "accel": "Apple M4 Pro 8P+4E+16GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 616.0, "tps": 202.15, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 2080 Ti", "model": "Llama 3.2 1B Instruct"}, {"bw": 616.0, "tps": 37.32, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 2080 Ti", "model": "Qwen2.5 14B Instruct"}, {"bw": 336.0, "tps": 24.91, "params_b": 4.55, "accel": "NVIDIA GeForce RTX 2060", "model": "Gemma 3 4b It"}, {"bw": 256.0, "tps": 95.6, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 4060 Laptop GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 448.0, "tps": 58.61, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 3060 Ti", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 448.0, "tps": 152.81, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 3060 Ti", "model": "Llama 3.2 1B Instruct"}, {"bw": 936.0, "tps": 59.57, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 3090", "model": "Qwen2.5 14B Instruct"}, {"bw": 936.0, "tps": 102.76, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 3090", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 936.0, "tps": 340.12, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 3090", "model": "Llama 3.2 1B Instruct"}, {"bw": 512.0, "tps": 12.34, "params_b": 1.5, "accel": "AMD Radeon RX 6900 XT", "model": "Llama 3.2 1B Instruct"}, {"bw": 512.0, "tps": 3.02, "params_b": 14.77, "accel": "AMD Radeon RX 6900 XT", "model": "Qwen2.5 14B Instruct"}, {"bw": 100.0, "tps": 59.86, "params_b": 1.5, "accel": "Apple M2 4P+4E+10GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 3350.0, "tps": 96.62, "params_b": 1.5, "accel": "NVIDIA H100 80GB HBM3", "model": "Llama 3.2 1B Instruct"}, {"bw": 512.0, "tps": 12.13, "params_b": 1.5, "accel": "AMD Radeon RX 6800 XT", "model": "Llama 3.2 1B Instruct"}, {"bw": 432.0, "tps": 57.42, "params_b": 8.03, "accel": "NVIDIA RTX 3500 Ada Generation Laptop GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 432.0, "tps": 181.38, "params_b": 1.5, "accel": "NVIDIA RTX 3500 Ada Generation Laptop GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 432.0, "tps": 32.77, "params_b": 14.77, "accel": "NVIDIA RTX 3500 Ada Generation Laptop GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 1008.0, "tps": 84.19, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 4090", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 273.0, "tps": 110.82, "params_b": 1.5, "accel": "Apple M4 Pro 8P+4E+16GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 256.0, "tps": 21.5, "params_b": 8.03, "accel": "AMD Radeon RX 6600 XT", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 800.0, "tps": 120.41, "params_b": 1.5, "accel": "Apple M2 Ultra 16P+8E", "model": "Llama 3.2 1B Instruct"}, {"bw": 1008.0, "tps": 22.32, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 4090", "model": "Llama 3.2 1B Instruct"}, {"bw": 800.0, "tps": 176.39, "params_b": 1.5, "accel": "Apple M2 Ultra 16P+8E+60GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 512.0, "tps": 101.44, "params_b": 1.5, "accel": "AMD Radeon RX 6800", "model": "Llama 3.2 1B Instruct"}, {"bw": 800.0, "tps": 34.59, "params_b": 1.5, "accel": "Radeon RX 7900 XT", "model": "Llama 3.2 1B Instruct"}, {"bw": 624.0, "tps": 22.98, "params_b": 14.77, "accel": "AMD Radeon RX 7800 XT", "model": "Qwen2.5 14B Instruct"}, {"bw": 384.0, "tps": 27.33, "params_b": 8.03, "accel": "AMD Radeon RX 6700 XT", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 384.0, "tps": 92.45, "params_b": 1.5, "accel": "AMD Radeon RX 6700 XT", "model": "Llama 3.2 1B Instruct"}, {"bw": 112.0, "tps": 35.01, "params_b": 1.5, "accel": "NVIDIA GeForce GTX 1050", "model": "Llama 3.2 1B Instruct"}, {"bw": 640.0, "tps": 82.57, "params_b": 3.61, "accel": "AMD Radeon RX 9070 XT", "model": "Llama 3.2 3B Instruct"}, {"bw": 640.0, "tps": 165.13, "params_b": 1.5, "accel": "AMD Radeon RX 9070 XT", "model": "Llama 3.2 1B Instruct"}, {"bw": 192.0, "tps": 113.7, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 4050 Laptop GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 400.0, "tps": 31.62, "params_b": 8.03, "accel": "Apple M1 Max 8P+2E+32GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 100.0, "tps": 67.75, "params_b": 1.5, "accel": "Apple M3 4P+4E+10GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 120.0, "tps": 17.73, "params_b": 8.03, "accel": "Apple M4 4P+6E+10GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 120.0, "tps": 76.92, "params_b": 1.5, "accel": "Apple M4 4P+6E+10GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 100.0, "tps": 10.77, "params_b": 8.03, "accel": "Apple M3 4P+4E+10GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 640.0, "tps": 159.87, "params_b": 1.5, "accel": "AMD Radeon RX 9070 XT", "model": "Llama 3.2 1B Instruct"}, {"bw": 640.0, "tps": 79.38, "params_b": 3.61, "accel": "AMD Radeon RX 9070 XT", "model": "Llama 3.2 3B Instruct"}, {"bw": 512.0, "tps": 24.49, "params_b": 14.77, "accel": "AMD Radeon RX 6900 XT", "model": "Qwen2.5 14B Instruct"}, {"bw": 150.0, "tps": 12.08, "params_b": 14.77, "accel": "Apple M3 Pro 6P+6E+18GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 150.0, "tps": 22.06, "params_b": 8.03, "accel": "Apple M3 Pro 6P+6E+18GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 546.0, "tps": 30.8, "params_b": 14.77, "accel": "Apple M4 Max 12P+4E+40GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 150.0, "tps": 89.6, "params_b": 1.5, "accel": "Apple M3 Pro 6P+6E+18GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 672.0, "tps": 42.94, "params_b": 14.77, "accel": "Quadro RTX 8000", "model": "Qwen2.5 14B Instruct"}, {"bw": 288.0, "tps": 26.24, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 4060 Ti", "model": "Qwen2.5 14B Instruct"}, {"bw": 672.0, "tps": 250.36, "params_b": 1.5, "accel": "Quadro RTX 8000", "model": "Llama 3.2 1B Instruct"}, {"bw": 672.0, "tps": 74.02, "params_b": 8.03, "accel": "Quadro RTX 8000", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 273.0, "tps": 17.96, "params_b": 14.77, "accel": "Apple M4 Pro 10P+4E+20GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 273.0, "tps": 31.64, "params_b": 8.03, "accel": "Apple M4 Pro 10P+4E+20GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 100.0, "tps": 6.3, "params_b": 14.77, "accel": "Apple M3 4P+4E+10GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 273.0, "tps": 119.04, "params_b": 1.5, "accel": "Apple M4 Pro 10P+4E+20GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 672.0, "tps": 47.27, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 4070 Ti SUPER", "model": "Qwen2.5 14B Instruct"}, {"bw": 672.0, "tps": 200.57, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 4070 Ti SUPER", "model": "Llama 3.2 1B Instruct"}, {"bw": 672.0, "tps": 82.54, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 4070 Ti SUPER", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 68.0, "tps": 40.89, "params_b": 1.5, "accel": "Apple M1 4P+4E+8GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 546.0, "tps": 54.78, "params_b": 8.03, "accel": "Apple M4 Max 12P+4E+40GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 546.0, "tps": 179.53, "params_b": 1.5, "accel": "Apple M4 Max 12P+4E+40GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 273.0, "tps": 30.18, "params_b": 8.03, "accel": "Apple M4 Pro 8P+4E+16GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 120.0, "tps": 76.6, "params_b": 1.5, "accel": "Apple M4 4P+6E+10GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 960.0, "tps": 15.99, "params_b": 1.5, "accel": "AMD Radeon RX 7900 XTX", "model": "Llama 3.2 1B Instruct"}, {"bw": 1008.0, "tps": 102.67, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 4090", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 1008.0, "tps": 282.28, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 4090", "model": "Llama 3.2 1B Instruct"}, {"bw": 1008.0, "tps": 60.31, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 4090", "model": "Qwen2.5 14B Instruct"}, {"bw": 256.0, "tps": 97.28, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 4060 Laptop GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 1008.0, "tps": 16.87, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 4090", "model": "Qwen2.5 14B Instruct"}, {"bw": 546.0, "tps": 23.11, "params_b": 14.77, "accel": "Apple M4 Max 12P+4E+40GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 1008.0, "tps": 66.1, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 4090", "model": "Llama 3.2 1B Instruct"}, {"bw": 400.0, "tps": 46.41, "params_b": 8.03, "accel": "Apple M2 Max 8P+4E+38GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 717.0, "tps": 90.12, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 4080", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 717.0, "tps": 52.15, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 4080", "model": "Qwen2.5 14B Instruct"}, {"bw": 400.0, "tps": 25.23, "params_b": 14.77, "accel": "Apple M2 Max 8P+4E+38GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 288.0, "tps": 144.44, "params_b": 1.5, "accel": "NVIDIA RTX A2000 12GB", "model": "Llama 3.2 1B Instruct"}, {"bw": 360.0, "tps": 29.67, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 3060", "model": "Qwen2.5 14B Instruct"}, {"bw": 504.0, "tps": 39.41, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 4070 Ti", "model": "Qwen2.5 14B Instruct"}, {"bw": 504.0, "tps": 69.06, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 4070 Ti", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 504.0, "tps": 199.84, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 4070 Ti", "model": "Llama 3.2 1B Instruct"}, {"bw": 504.0, "tps": 188.69, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 4070 Ti", "model": "Llama 3.2 1B Instruct"}, {"bw": 200.0, "tps": 10.81, "params_b": 14.77, "accel": "Apple M1 Pro 6P+2E+14GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 100.0, "tps": 60.03, "params_b": 1.5, "accel": "Apple M2 4P+4E+10GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 546.0, "tps": 26.6, "params_b": 14.77, "accel": "Apple M4 Max 12P+4E+40GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 546.0, "tps": 179.4, "params_b": 1.5, "accel": "Apple M4 Max 12P+4E+40GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 360.0, "tps": 25.99, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 3060", "model": "Qwen2.5 14B Instruct"}, {"bw": 100.0, "tps": 14.86, "params_b": 8.03, "accel": "Apple M2 4P+4E+10GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 546.0, "tps": 30.76, "params_b": 14.77, "accel": "Apple M4 Max 12P+4E+40GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 273.0, "tps": 9.4, "params_b": 28.42, "accel": "Apple M4 Pro 8P+4E+16GPU", "model": "Gemma 3 27b It"}, {"bw": 512.0, "tps": 19.5, "params_b": 14.77, "accel": "AMD Radeon RX 6800", "model": "Qwen2.5 14B Instruct"}, {"bw": 960.0, "tps": 31.62, "params_b": 14.77, "accel": "AMD Radeon RX 7900 XTX", "model": "Qwen2.5 14B Instruct"}, {"bw": 960.0, "tps": 51.18, "params_b": 8.03, "accel": "AMD Radeon RX 7900 XTX", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 960.0, "tps": 151.55, "params_b": 1.5, "accel": "AMD Radeon RX 7900 XTX", "model": "Llama 3.2 1B Instruct"}, {"bw": 448.0, "tps": 36.41, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 3070", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 960.0, "tps": 44.91, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 5080", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 273.0, "tps": 25.53, "params_b": 6.74, "accel": "Apple M4 Pro 8P+4E+16GPU", "model": "LLaMA v2"}, {"bw": 936.0, "tps": 113.96, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 3090", "model": "Llama 3.2 1B Instruct"}, {"bw": 273.0, "tps": 24.41, "params_b": 6.74, "accel": "Apple M4 Pro 8P+4E+16GPU", "model": "meta-llama-7b"}, {"bw": 936.0, "tps": 114.57, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 3090", "model": "Llama 3.2 1B Instruct"}, {"bw": 504.0, "tps": 38.8, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 4070 SUPER", "model": "Qwen2.5 14B Instruct"}, {"bw": 504.0, "tps": 67.95, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 4070 SUPER", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 504.0, "tps": 238.23, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 4070 SUPER", "model": "Llama 3.2 1B Instruct"}, {"bw": 150.0, "tps": 19.11, "params_b": 8.03, "accel": "Apple M3 Pro 5P+6E+14GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 819.0, "tps": 34.44, "params_b": 14.77, "accel": "Apple M3 Ultra 20P+8E+60GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 512.0, "tps": 109.74, "params_b": 1.5, "accel": "AMD Radeon RX 6800 XT", "model": "Llama 3.2 1B Instruct"}, {"bw": 512.0, "tps": 35.52, "params_b": 8.03, "accel": "AMD Radeon RX 6800 XT", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 512.0, "tps": 20.53, "params_b": 14.77, "accel": "AMD Radeon RX 6800 XT", "model": "Qwen2.5 14B Instruct"}, {"bw": 360.0, "tps": 27.39, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 3060", "model": "Qwen2.5 14B Instruct"}, {"bw": 360.0, "tps": 201.99, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 3060", "model": "Llama 3.2 1B Instruct"}, {"bw": 448.0, "tps": 5.08, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 3070", "model": "Qwen2.5 14B Instruct"}, {"bw": 200.0, "tps": 98.02, "params_b": 1.5, "accel": "Apple M2 Pro 8P+4E+19GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 448.0, "tps": 60.49, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 3070", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 200.0, "tps": 72.7, "params_b": 1.5, "accel": "Apple M1 Pro 6P+2E+14GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 448.0, "tps": 217.85, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 3070", "model": "Llama 3.2 1B Instruct"}, {"bw": 320.0, "tps": 191.17, "params_b": 1.5, "accel": "NVIDIA RTX 4000 SFF Ada Generation", "model": "Llama 3.2 1B Instruct"}, {"bw": 936.0, "tps": 328.12, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 3090", "model": "Llama 3.2 1B Instruct"}, {"bw": 936.0, "tps": 59.77, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 3090", "model": "Qwen2.5 14B Instruct"}, {"bw": 936.0, "tps": 59.73, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 3090", "model": "Qwen2.5 14B Instruct"}, {"bw": 200.0, "tps": 19.79, "params_b": 8.03, "accel": "Apple M1 Pro 6P+2E+14GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 300.0, "tps": 187.66, "params_b": 1.5, "accel": "NVIDIA L4", "model": "Llama 3.2 1B Instruct"}, {"bw": 288.0, "tps": 49.13, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 4060 Ti", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 288.0, "tps": 26.8, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 4060 Ti", "model": "Qwen2.5 14B Instruct"}, {"bw": 300.0, "tps": 23.54, "params_b": 14.77, "accel": "NVIDIA L4", "model": "Qwen2.5 14B Instruct"}, {"bw": 504.0, "tps": 38.83, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 4070 Ti", "model": "Qwen2.5 14B Instruct"}, {"bw": 504.0, "tps": 67.96, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 4070 Ti", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 504.0, "tps": 199.01, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 4070 Ti", "model": "Llama 3.2 1B Instruct"}, {"bw": 288.0, "tps": 214.34, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 4060 Ti", "model": "Llama 3.2 1B Instruct"}, {"bw": 800.0, "tps": 48.84, "params_b": 8.03, "accel": "Apple M1 Ultra 16P+4E+48GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 800.0, "tps": 137.63, "params_b": 1.5, "accel": "Apple M1 Ultra 16P+4E+48GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 512.0, "tps": 17.72, "params_b": 8.03, "accel": "AMD Radeon RX 6900 XT", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 448.0, "tps": 64.67, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 3070", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 448.0, "tps": 254.11, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 3070", "model": "Llama 3.2 1B Instruct"}, {"bw": 512.0, "tps": 44.5, "params_b": 8.03, "accel": "AMD Radeon RX 6900 XT", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 68.0, "tps": 39.04, "params_b": 1.5, "accel": "Apple M1 4P+4E+8GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 512.0, "tps": 50.28, "params_b": 8.03, "accel": "AMD Radeon RX 6900 XT", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 448.0, "tps": 60.29, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 3070", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 150.0, "tps": 12.18, "params_b": 14.77, "accel": "Apple M3 Pro 6P+6E+18GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 512.0, "tps": 38.32, "params_b": 8.03, "accel": "AMD Radeon RX 6900 XT", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 200.0, "tps": 19.97, "params_b": 8.03, "accel": "Apple M1 Pro 6P+2E+14GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 936.0, "tps": 112.71, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 3090", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 100.0, "tps": 13.58, "params_b": 8.03, "accel": "Apple M3 4P+4E+10GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 936.0, "tps": 65.08, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 3090", "model": "Qwen2.5 14B Instruct"}, {"bw": 200.0, "tps": 71.02, "params_b": 1.5, "accel": "Apple M1 Pro 6P+2E+14GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 100.0, "tps": 67.77, "params_b": 1.5, "accel": "Apple M3 4P+4E+10GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 100.0, "tps": 67.92, "params_b": 1.5, "accel": "Apple M3 4P+4E+10GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 200.0, "tps": 72.89, "params_b": 1.5, "accel": "Apple M1 Pro 6P+2E+14GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 448.0, "tps": 173.17, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 3060 Ti", "model": "Llama 3.2 1B Instruct"}, {"bw": 448.0, "tps": 54.37, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 3060 Ti", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 448.0, "tps": 169.33, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 3060 Ti", "model": "Llama 3.2 1B Instruct"}, {"bw": 936.0, "tps": 368.27, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 3090", "model": "Llama 3.2 1B Instruct"}, {"bw": 936.0, "tps": 63.41, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 3090", "model": "Qwen2.5 14B Instruct"}, {"bw": 936.0, "tps": 60.54, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 3090", "model": "Qwen2.5 14B Instruct"}, {"bw": 819.0, "tps": 35.88, "params_b": 14.77, "accel": "Apple M3 Ultra 24P+8E+80GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 448.0, "tps": 34.84, "params_b": 14.77, "accel": "NVIDIA RTX A4000", "model": "Qwen2.5 14B Instruct"}, {"bw": 150.0, "tps": 11.6, "params_b": 14.77, "accel": "Apple M3 Pro 6P+6E+18GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 1008.0, "tps": 68.1, "params_b": 4.55, "accel": "NVIDIA GeForce RTX 4090", "model": "Model Hf"}, {"bw": 800.0, "tps": 137.16, "params_b": 1.5, "accel": "Apple M1 Ultra 16P+4E+48GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 800.0, "tps": 48.73, "params_b": 8.03, "accel": "Apple M1 Ultra 16P+4E+48GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 100.0, "tps": 7.35, "params_b": 14.77, "accel": "Apple M2 4P+4E+10GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 800.0, "tps": 27.7, "params_b": 14.77, "accel": "Apple M1 Ultra 16P+4E+48GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 546.0, "tps": 28.27, "params_b": 14.77, "accel": "Apple M4 Max 12P+4E+40GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 546.0, "tps": 43.01, "params_b": 8.03, "accel": "Apple M4 Max 12P+4E+40GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 546.0, "tps": 186.4, "params_b": 1.5, "accel": "Apple M4 Max 12P+4E+40GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 936.0, "tps": 83.89, "params_b": 9.15, "accel": "NVIDIA GeForce RTX 3090", "model": "EuroLLM 9B Instruct"}, {"bw": 936.0, "tps": 203.84, "params_b": 1.78, "accel": "NVIDIA GeForce RTX 3090", "model": "Qwen2.5 1.5B Instruct"}, {"bw": 936.0, "tps": 50.17, "params_b": 7.62, "accel": "NVIDIA GeForce RTX 3090", "model": "AReaL Boba RL 7B"}, {"bw": 936.0, "tps": 205.35, "params_b": 1.78, "accel": "NVIDIA GeForce RTX 3090", "model": "Qwen2.5 1.5B Instruct"}, {"bw": 936.0, "tps": 131.85, "params_b": 3.82, "accel": "NVIDIA GeForce RTX 3090", "model": "Phi 3 Mini 128k Instruct"}, {"bw": 936.0, "tps": 206.8, "params_b": 1.78, "accel": "NVIDIA GeForce RTX 3090", "model": "Qwen2.5 1.5B Instruct"}, {"bw": 936.0, "tps": 131.03, "params_b": 3.82, "accel": "NVIDIA GeForce RTX 3090", "model": "Phi 3.5 Mini Instruct"}, {"bw": 936.0, "tps": 299.68, "params_b": 0.63, "accel": "NVIDIA GeForce RTX 3090", "model": "Qwen2.5 0.5B Instruct"}, {"bw": 936.0, "tps": 73.67, "params_b": 8.37, "accel": "NVIDIA GeForce RTX 3090", "model": "Granite 3.2 8b Instruct"}, {"bw": 273.0, "tps": 30.35, "params_b": 8.03, "accel": "Apple M4 Pro 8P+4E+16GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 192.0, "tps": 2.8, "params_b": 8.03, "accel": "NVIDIA GeForce GTX 1650 Ti", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 273.0, "tps": 110.79, "params_b": 1.5, "accel": "Apple M4 Pro 8P+4E+16GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 546.0, "tps": 190.03, "params_b": 1.5, "accel": "Apple M4 Max 12P+4E+40GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 192.0, "tps": 4.3, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 3050 Ti Laptop GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 616.0, "tps": 184.51, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 2080 Ti", "model": "Llama 3.2 1B Instruct"}, {"bw": 192.0, "tps": 20.71, "params_b": 1.5, "accel": "NVIDIA GeForce GTX 1650 Ti", "model": "Llama 3.2 1B Instruct"}, {"bw": 273.0, "tps": 115.83, "params_b": 1.5, "accel": "Apple M4 Pro 10P+4E+20GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 273.0, "tps": 31.82, "params_b": 8.03, "accel": "Apple M4 Pro 10P+4E+20GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 273.0, "tps": 17.73, "params_b": 14.77, "accel": "Apple M4 Pro 10P+4E+20GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 504.0, "tps": 35.0, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 4070", "model": "Qwen2.5 14B Instruct"}, {"bw": 273.0, "tps": 17.78, "params_b": 14.77, "accel": "Apple M4 Pro 10P+4E+20GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 192.0, "tps": 122.45, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 3050 Ti Laptop GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 192.0, "tps": 125.66, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 3050 Ti Laptop GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 400.0, "tps": 124.48, "params_b": 1.5, "accel": "Apple M1 Max 8P+2E+32GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 760.0, "tps": 162.02, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 3080", "model": "Llama 3.2 1B Instruct"}, {"bw": 760.0, "tps": 160.6, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 3080", "model": "Llama 3.2 1B Instruct"}, {"bw": 760.0, "tps": 68.24, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 3080", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 288.0, "tps": 30.67, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 4060 Ti", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 288.0, "tps": 106.31, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 4060 Ti", "model": "Llama 3.2 1B Instruct"}, {"bw": 504.0, "tps": 25.7, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 4070 Ti", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 504.0, "tps": 55.01, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 4070 Ti", "model": "Llama 3.2 1B Instruct"}, {"bw": 512.0, "tps": 27.08, "params_b": 12.25, "accel": "AMD Radeon RX 6900 XT", "model": "Mistral Nemo Instruct 2407"}, {"bw": 512.0, "tps": 25.35, "params_b": 12.25, "accel": "AMD Radeon RX 6900 XT", "model": "Mistral Nemo Instruct 2407"}, {"bw": 512.0, "tps": 23.79, "params_b": 12.25, "accel": "AMD Radeon RX 6900 XT", "model": "Mistral Nemo Instruct 2407"}, {"bw": 512.0, "tps": 20.72, "params_b": 12.25, "accel": "AMD Radeon RX 6900 XT", "model": "Mistral Nemo Instruct 2407"}, {"bw": 288.0, "tps": 52.47, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 4060 Ti", "model": "Llama 3.2 1B Instruct"}, {"bw": 288.0, "tps": 3.5, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 4060 Ti", "model": "Qwen2.5 14B Instruct"}, {"bw": 512.0, "tps": 36.72, "params_b": 8.03, "accel": "AMD Radeon RX 6900 XT", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 512.0, "tps": 34.88, "params_b": 8.03, "accel": "AMD Radeon RX 6900 XT", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 512.0, "tps": 33.14, "params_b": 8.03, "accel": "AMD Radeon RX 6900 XT", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 512.0, "tps": 29.48, "params_b": 8.03, "accel": "AMD Radeon RX 6900 XT", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 512.0, "tps": 55.34, "params_b": 3.61, "accel": "AMD Radeon RX 6900 XT", "model": "Llama 3.2 3B Instruct"}, {"bw": 288.0, "tps": 20.64, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 4060 Ti", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 512.0, "tps": 53.59, "params_b": 3.61, "accel": "AMD Radeon RX 6900 XT", "model": "Llama 3.2 3B Instruct"}, {"bw": 512.0, "tps": 52.82, "params_b": 3.61, "accel": "AMD Radeon RX 6900 XT", "model": "Llama 3.2 3B Instruct"}, {"bw": 512.0, "tps": 49.13, "params_b": 3.61, "accel": "AMD Radeon RX 6900 XT", "model": "Llama 3.2 3B Instruct"}, {"bw": 288.0, "tps": 52.62, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 4060 Ti", "model": "Llama 3.2 1B Instruct"}, {"bw": 512.0, "tps": 29.13, "params_b": 3.61, "accel": "AMD Radeon RX 6900 XT", "model": "Llama 3.2 3B Instruct"}, {"bw": 512.0, "tps": 113.35, "params_b": 1.5, "accel": "AMD Radeon RX 6900 XT", "model": "Llama 3.2 1B Instruct"}, {"bw": 89.0, "tps": 6.41, "params_b": 3.61, "accel": "AMD Radeon 780M", "model": "Llama 3.2 3B Instruct"}, {"bw": 512.0, "tps": 110.3, "params_b": 1.5, "accel": "AMD Radeon RX 6900 XT", "model": "Llama 3.2 1B Instruct"}, {"bw": 512.0, "tps": 108.86, "params_b": 1.5, "accel": "AMD Radeon RX 6900 XT", "model": "Llama 3.2 1B Instruct"}, {"bw": 512.0, "tps": 102.57, "params_b": 1.5, "accel": "AMD Radeon RX 6900 XT", "model": "Llama 3.2 1B Instruct"}, {"bw": 512.0, "tps": 55.49, "params_b": 1.5, "accel": "AMD Radeon RX 6900 XT", "model": "Llama 3.2 1B Instruct"}, {"bw": 800.0, "tps": 7.99, "params_b": 14.77, "accel": "Apple M1 Ultra 16P+4E", "model": "Qwen2.5 14B Instruct"}, {"bw": 504.0, "tps": 24.94, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 4070 SUPER", "model": "Qwen2.5 14B Instruct"}, {"bw": 504.0, "tps": 45.47, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 4070 SUPER", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 504.0, "tps": 138.48, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 4070 SUPER", "model": "Llama 3.2 1B Instruct"}, {"bw": 89.0, "tps": 11.66, "params_b": 1.5, "accel": "AMD Radeon 780M", "model": "Llama 3.2 1B Instruct"}, {"bw": 800.0, "tps": 15.25, "params_b": 8.03, "accel": "Apple M1 Ultra 16P+4E", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 800.0, "tps": 49.12, "params_b": 8.03, "accel": "Apple M1 Ultra 16P+4E+48GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 800.0, "tps": 27.85, "params_b": 14.77, "accel": "Apple M1 Ultra 16P+4E+48GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 800.0, "tps": 57.07, "params_b": 1.5, "accel": "Apple M1 Ultra 16P+4E", "model": "Llama 3.2 1B Instruct"}, {"bw": 800.0, "tps": 139.12, "params_b": 1.5, "accel": "Apple M1 Ultra 16P+4E+48GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 800.0, "tps": 169.76, "params_b": 1.5, "accel": "Apple M2 Ultra 16P+8E+60GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 150.0, "tps": 11.95, "params_b": 14.77, "accel": "Apple M3 Pro 5P+6E+14GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 100.0, "tps": 24.64, "params_b": 1.5, "accel": "Apple M2 4P+4E+8GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 448.0, "tps": 52.78, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 3070", "model": "DeepSeek R1 Distill Llama 8B"}, {"bw": 448.0, "tps": 52.83, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 3070", "model": "DeepSeek R1 Distill Llama 8B"}, {"bw": 1792.0, "tps": 24.29, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 5090", "model": "Qwen2.5 14B Instruct"}, {"bw": 448.0, "tps": 7.48, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 3060 Ti", "model": "Qwen2.5 14B Instruct"}, {"bw": 448.0, "tps": 67.34, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 3060 Ti", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 68.0, "tps": 4.79, "params_b": 14.77, "accel": "Apple M1 4P+4E+7GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 448.0, "tps": 181.94, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 3060 Ti", "model": "Llama 3.2 1B Instruct"}, {"bw": 68.0, "tps": 38.41, "params_b": 1.5, "accel": "Apple M1 4P+4E+7GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 68.0, "tps": 9.09, "params_b": 8.03, "accel": "Apple M1 4P+4E+7GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 672.0, "tps": 55.73, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 4070 Ti SUPER", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 672.0, "tps": 135.65, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 4070 Ti SUPER", "model": "Llama 3.2 1B Instruct"}, {"bw": 100.0, "tps": 14.67, "params_b": 8.03, "accel": "Apple M2 4P+4E+10GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 1008.0, "tps": 62.4, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 3090 Ti", "model": "Qwen2.5 14B Instruct"}, {"bw": 864.0, "tps": 13.63, "params_b": 14.77, "accel": "NVIDIA L40S", "model": "Qwen2.5 14B Instruct"}, {"bw": 1008.0, "tps": 108.72, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 3090 Ti", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 1008.0, "tps": 343.47, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 3090 Ti", "model": "Llama 3.2 1B Instruct"}, {"bw": 936.0, "tps": 300.38, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 3090", "model": "Llama 3.2 1B Instruct"}, {"bw": 200.0, "tps": 68.57, "params_b": 1.5, "accel": "Apple M1 Pro 8P+2E+16GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 546.0, "tps": 51.94, "params_b": 7.62, "accel": "Apple M4 Max 10P+4E+32GPU", "model": "DeepSeek R1 Distill Qwen 7B"}, {"bw": 546.0, "tps": 27.75, "params_b": 14.77, "accel": "Apple M4 Max 10P+4E+32GPU", "model": "DeepSeek R1 Distill Qwen 14B"}, {"bw": 546.0, "tps": 36.34, "params_b": 10.16, "accel": "Apple M4 Max 10P+4E+32GPU", "model": "gemma-2-9b-it"}, {"bw": 546.0, "tps": 48.11, "params_b": 7.24, "accel": "Apple M4 Max 10P+4E+32GPU", "model": "C:\\LLM_MODELS\\mistralai"}, {"bw": 546.0, "tps": 133.12, "params_b": 1.78, "accel": "Apple M4 Max 10P+4E+32GPU", "model": "DeepSeek R1 Distill Qwen 1.5B"}, {"bw": 672.0, "tps": 20.82, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 5070", "model": "Qwen2.5 14B Instruct"}, {"bw": 1792.0, "tps": 65.29, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 5090", "model": "Qwen2.5 14B Instruct"}, {"bw": 546.0, "tps": 21.32, "params_b": 23.57, "accel": "Apple M4 Max 10P+4E+32GPU", "model": "Mistral Small 24B Instruct 2501"}, {"bw": 1792.0, "tps": 65.12, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 5090", "model": "Qwen2.5 14B Instruct"}, {"bw": 896.0, "tps": 237.67, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 5070 Ti", "model": "Llama 3.2 1B Instruct"}, {"bw": 672.0, "tps": 36.99, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 5070", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 1792.0, "tps": 64.99, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 5090", "model": "Qwen2.5 14B Instruct"}, {"bw": 896.0, "tps": 47.68, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 5070 Ti", "model": "Qwen2.5 14B Instruct"}, {"bw": 1792.0, "tps": 106.39, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 5090", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 896.0, "tps": 79.79, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 5070 Ti", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 672.0, "tps": 100.65, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 5070", "model": "Llama 3.2 1B Instruct"}, {"bw": 1792.0, "tps": 275.61, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 5090", "model": "Llama 3.2 1B Instruct"}, {"bw": 546.0, "tps": 169.14, "params_b": 1.5, "accel": "Apple M4 Max 10P+4E+32GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 896.0, "tps": 237.91, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 5070 Ti", "model": "Llama 3.2 1B Instruct"}, {"bw": 546.0, "tps": 47.17, "params_b": 8.03, "accel": "Apple M4 Max 10P+4E+32GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 1792.0, "tps": 274.5, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 5090", "model": "Llama 3.2 1B Instruct"}, {"bw": 546.0, "tps": 26.42, "params_b": 14.77, "accel": "Apple M4 Max 10P+4E+32GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 896.0, "tps": 240.3, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 5070 Ti", "model": "Llama 3.2 1B Instruct"}, {"bw": 896.0, "tps": 240.08, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 5070 Ti", "model": "Llama 3.2 1B Instruct"}, {"bw": 320.0, "tps": 24.86, "params_b": 14.77, "accel": "NVIDIA RTX 4000 SFF Ada Generation", "model": "Qwen2.5 14B Instruct"}, {"bw": 320.0, "tps": 44.63, "params_b": 8.03, "accel": "NVIDIA RTX 4000 SFF Ada Generation", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 320.0, "tps": 187.57, "params_b": 1.5, "accel": "NVIDIA RTX 4000 SFF Ada Generation", "model": "Llama 3.2 1B Instruct"}, {"bw": 1792.0, "tps": 43.18, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 5090", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 1792.0, "tps": 101.96, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 5090", "model": "Llama 3.2 1B Instruct"}, {"bw": 768.0, "tps": 272.52, "params_b": 1.5, "accel": "NVIDIA RTX A5000", "model": "Llama 3.2 1B Instruct"}, {"bw": 504.0, "tps": 42.62, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 4070", "model": "Qwen2.5 14B Instruct"}, {"bw": 504.0, "tps": 76.35, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 4070", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 288.0, "tps": 27.05, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 4060 Ti", "model": "Qwen2.5 14B Instruct"}, {"bw": 672.0, "tps": 53.57, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 4070 Ti SUPER", "model": "Qwen2.5 14B Instruct"}, {"bw": 1008.0, "tps": 70.78, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 4090", "model": "Qwen2.5 14B Instruct"}, {"bw": 272.0, "tps": 45.83, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 4060", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 504.0, "tps": 282.97, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 4070", "model": "Llama 3.2 1B Instruct"}, {"bw": 1008.0, "tps": 122.45, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 4090", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 760.0, "tps": 52.04, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 3080", "model": "Qwen2.5 14B Instruct"}, {"bw": 672.0, "tps": 94.5, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 4070 Ti SUPER", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 288.0, "tps": 49.6, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 4060 Ti", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 504.0, "tps": 77.19, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 4070 Ti", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 1008.0, "tps": 395.7, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 4090", "model": "Llama 3.2 1B Instruct"}, {"bw": 912.0, "tps": 59.89, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 3080 Ti", "model": "Qwen2.5 14B Instruct"}, {"bw": 760.0, "tps": 92.55, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 3080", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 504.0, "tps": 308.79, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 4070 Ti", "model": "Llama 3.2 1B Instruct"}, {"bw": 272.0, "tps": 190.44, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 4060", "model": "Llama 3.2 1B Instruct"}, {"bw": 672.0, "tps": 342.15, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 4070 Ti SUPER", "model": "Llama 3.2 1B Instruct"}, {"bw": 288.0, "tps": 217.52, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 4060 Ti", "model": "Llama 3.2 1B Instruct"}, {"bw": 912.0, "tps": 103.81, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 3080 Ti", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 1008.0, "tps": 64.63, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 3090 Ti", "model": "Qwen2.5 14B Instruct"}, {"bw": 760.0, "tps": 315.22, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 3080", "model": "Llama 3.2 1B Instruct"}, {"bw": 912.0, "tps": 331.77, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 3080 Ti", "model": "Llama 3.2 1B Instruct"}, {"bw": 360.0, "tps": 29.14, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 3060", "model": "Qwen2.5 14B Instruct"}, {"bw": 1008.0, "tps": 110.84, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 3090 Ti", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 448.0, "tps": 63.44, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 3070", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 1008.0, "tps": 346.04, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 3090 Ti", "model": "Llama 3.2 1B Instruct"}, {"bw": 448.0, "tps": 234.73, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 3070", "model": "Llama 3.2 1B Instruct"}, {"bw": 608.0, "tps": 83.17, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 3070 Ti", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 448.0, "tps": 61.71, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 3060 Ti", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 360.0, "tps": 53.07, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 3060", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 608.0, "tps": 297.33, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 3070 Ti", "model": "Llama 3.2 1B Instruct"}, {"bw": 448.0, "tps": 244.13, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 3060 Ti", "model": "Llama 3.2 1B Instruct"}, {"bw": 912.0, "tps": 321.18, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 3080 Ti", "model": "Llama 3.2 1B Instruct"}, {"bw": 360.0, "tps": 214.26, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 3060", "model": "Llama 3.2 1B Instruct"}, {"bw": 616.0, "tps": 42.16, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 2080 Ti", "model": "Qwen2.5 14B Instruct"}, {"bw": 448.0, "tps": 51.33, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 2060 SUPER", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 616.0, "tps": 73.96, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 2080 Ti", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 736.0, "tps": 29.92, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 4080 SUPER", "model": "Qwen2.5 14B Instruct"}, {"bw": 100.0, "tps": 20.53, "params_b": 14.77, "accel": "Tesla P100-SXM2-16GB", "model": "Qwen2.5 14B Instruct"}, {"bw": 616.0, "tps": 242.28, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 2080 Ti", "model": "Llama 3.2 1B Instruct"}, {"bw": 448.0, "tps": 195.15, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 2060 SUPER", "model": "Llama 3.2 1B Instruct"}, {"bw": 320.0, "tps": 46.88, "params_b": 1.5, "accel": "NVIDIA GeForce GTX 1080", "model": "Llama 3.2 1B Instruct"}, {"bw": 100.0, "tps": 33.46, "params_b": 8.03, "accel": "Tesla P100-SXM2-16GB", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 448.0, "tps": 57.78, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 2070 SUPER", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 736.0, "tps": 54.44, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 4080 SUPER", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 484.0, "tps": 143.76, "params_b": 1.5, "accel": "NVIDIA GeForce GTX 1080 Ti", "model": "Llama 3.2 1B Instruct"}, {"bw": 736.0, "tps": 140.66, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 4080 SUPER", "model": "Llama 3.2 1B Instruct"}, {"bw": 448.0, "tps": 212.16, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 2070 SUPER", "model": "Llama 3.2 1B Instruct"}, {"bw": 100.0, "tps": 124.25, "params_b": 1.5, "accel": "Tesla P100-SXM2-16GB", "model": "Llama 3.2 1B Instruct"}, {"bw": 3350.0, "tps": 23.41, "params_b": 14.77, "accel": "NVIDIA H100 80GB HBM3", "model": "Qwen2.5 14B Instruct"}, {"bw": 256.0, "tps": 97.01, "params_b": 1.5, "accel": "NVIDIA GeForce GTX 1070", "model": "Llama 3.2 1B Instruct"}, {"bw": 256.0, "tps": 116.42, "params_b": 1.5, "accel": "NVIDIA GeForce GTX 1070 Ti", "model": "Llama 3.2 1B Instruct"}, {"bw": 960.0, "tps": 28.1, "params_b": 14.77, "accel": "NVIDIA RTX 6000 Ada Generation", "model": "Qwen2.5 14B Instruct"}, {"bw": 864.0, "tps": 24.93, "params_b": 14.77, "accel": "NVIDIA L40S", "model": "Qwen2.5 14B Instruct"}, {"bw": 300.0, "tps": 24.72, "params_b": 14.77, "accel": "NVIDIA L40", "model": "Qwen2.5 14B Instruct"}, {"bw": 3350.0, "tps": 42.41, "params_b": 8.03, "accel": "NVIDIA H100 80GB HBM3", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 1008.0, "tps": 31.87, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 4090", "model": "Qwen2.5 14B Instruct"}, {"bw": 960.0, "tps": 51.29, "params_b": 8.03, "accel": "NVIDIA RTX 6000 Ada Generation", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 3350.0, "tps": 20.46, "params_b": 14.77, "accel": "NVIDIA H100 NVL", "model": "Qwen2.5 14B Instruct"}, {"bw": 864.0, "tps": 45.91, "params_b": 8.03, "accel": "NVIDIA L40S", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 3350.0, "tps": 95.76, "params_b": 1.5, "accel": "NVIDIA H100 80GB HBM3", "model": "Llama 3.2 1B Instruct"}, {"bw": 960.0, "tps": 130.9, "params_b": 1.5, "accel": "NVIDIA RTX 6000 Ada Generation", "model": "Llama 3.2 1B Instruct"}, {"bw": 300.0, "tps": 45.15, "params_b": 8.03, "accel": "NVIDIA L40", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 864.0, "tps": 120.98, "params_b": 1.5, "accel": "NVIDIA L40S", "model": "Llama 3.2 1B Instruct"}, {"bw": 360.0, "tps": 31.37, "params_b": 14.77, "accel": "NVIDIA RTX 4000 Ada Generation", "model": "Qwen2.5 14B Instruct"}, {"bw": 300.0, "tps": 118.02, "params_b": 1.5, "accel": "NVIDIA L40", "model": "Llama 3.2 1B Instruct"}, {"bw": 3350.0, "tps": 37.06, "params_b": 8.03, "accel": "NVIDIA H100 NVL", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 1008.0, "tps": 58.32, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 4090", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 360.0, "tps": 56.51, "params_b": 8.03, "accel": "NVIDIA RTX 4000 Ada Generation", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 3350.0, "tps": 83.91, "params_b": 1.5, "accel": "NVIDIA H100 NVL", "model": "Llama 3.2 1B Instruct"}, {"bw": 1008.0, "tps": 143.56, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 4090", "model": "Llama 3.2 1B Instruct"}, {"bw": 360.0, "tps": 236.88, "params_b": 1.5, "accel": "NVIDIA RTX 4000 Ada Generation", "model": "Llama 3.2 1B Instruct"}, {"bw": 300.0, "tps": 23.64, "params_b": 14.77, "accel": "NVIDIA L4", "model": "Qwen2.5 14B Instruct"}, {"bw": 224.0, "tps": 21.03, "params_b": 14.77, "accel": "NVIDIA RTX 2000 Ada Generation", "model": "Qwen2.5 14B Instruct"}, {"bw": 2039.0, "tps": 20.82, "params_b": 14.77, "accel": "NVIDIA A100 80GB PCIe", "model": "Qwen2.5 14B Instruct"}, {"bw": 3350.0, "tps": 72.17, "params_b": 14.77, "accel": "NVIDIA H100 PCIe", "model": "Qwen2.5 14B Instruct"}, {"bw": 3350.0, "tps": 119.97, "params_b": 8.03, "accel": "NVIDIA H100 PCIe", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 300.0, "tps": 42.87, "params_b": 8.03, "accel": "NVIDIA L4", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 3350.0, "tps": 335.43, "params_b": 1.5, "accel": "NVIDIA H100 PCIe", "model": "Llama 3.2 1B Instruct"}, {"bw": 224.0, "tps": 37.93, "params_b": 8.03, "accel": "NVIDIA RTX 2000 Ada Generation", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 2039.0, "tps": 37.1, "params_b": 8.03, "accel": "NVIDIA A100 80GB PCIe", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 2039.0, "tps": 66.34, "params_b": 14.77, "accel": "NVIDIA A100-SXM4-80GB", "model": "Qwen2.5 14B Instruct"}, {"bw": 2039.0, "tps": 110.1, "params_b": 8.03, "accel": "NVIDIA A100-SXM4-80GB", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 2039.0, "tps": 81.87, "params_b": 1.5, "accel": "NVIDIA A100 80GB PCIe", "model": "Llama 3.2 1B Instruct"}, {"bw": 300.0, "tps": 190.04, "params_b": 1.5, "accel": "NVIDIA L4", "model": "Llama 3.2 1B Instruct"}, {"bw": 224.0, "tps": 172.92, "params_b": 1.5, "accel": "NVIDIA RTX 2000 Ada Generation", "model": "Llama 3.2 1B Instruct"}, {"bw": 2039.0, "tps": 308.11, "params_b": 1.5, "accel": "NVIDIA A100-SXM4-80GB", "model": "Llama 3.2 1B Instruct"}, {"bw": 640.0, "tps": 43.81, "params_b": 14.77, "accel": "NVIDIA RTX A4500", "model": "Qwen2.5 14B Instruct"}, {"bw": 768.0, "tps": 50.85, "params_b": 14.77, "accel": "NVIDIA RTX A5000", "model": "Qwen2.5 14B Instruct"}, {"bw": 768.0, "tps": 51.97, "params_b": 14.77, "accel": "NVIDIA RTX A6000", "model": "Qwen2.5 14B Instruct"}, {"bw": 640.0, "tps": 77.61, "params_b": 8.03, "accel": "NVIDIA RTX A4500", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 768.0, "tps": 89.86, "params_b": 8.03, "accel": "NVIDIA RTX A5000", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 768.0, "tps": 313.24, "params_b": 1.5, "accel": "NVIDIA RTX A5000", "model": "Llama 3.2 1B Instruct"}, {"bw": 768.0, "tps": 90.48, "params_b": 8.03, "accel": "NVIDIA RTX A6000", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 640.0, "tps": 284.74, "params_b": 1.5, "accel": "NVIDIA RTX A4500", "model": "Llama 3.2 1B Instruct"}, {"bw": 768.0, "tps": 315.3, "params_b": 1.5, "accel": "NVIDIA RTX A6000", "model": "Llama 3.2 1B Instruct"}, {"bw": 360.0, "tps": 51.99, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 3060", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 448.0, "tps": 34.13, "params_b": 14.77, "accel": "NVIDIA RTX A4000", "model": "Qwen2.5 14B Instruct"}, {"bw": 360.0, "tps": 29.2, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 3060", "model": "Qwen2.5 14B Instruct"}, {"bw": 448.0, "tps": 60.59, "params_b": 8.03, "accel": "NVIDIA RTX A4000", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 448.0, "tps": 241.48, "params_b": 1.5, "accel": "NVIDIA RTX A4000", "model": "Llama 3.2 1B Instruct"}, {"bw": 360.0, "tps": 206.33, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 3060", "model": "Llama 3.2 1B Instruct"}, {"bw": 280.0, "tps": 60.74, "params_b": 1.3, "accel": "AMD Radeon RX 6650 XT", "model": "Gemma 3 1b It"}, {"bw": 120.0, "tps": 76.42, "params_b": 1.5, "accel": "Apple M4 4P+6E+10GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 280.0, "tps": 82.21, "params_b": 1.5, "accel": "AMD Radeon RX 6650 XT", "model": "Llama 3.2 1B Instruct"}, {"bw": 280.0, "tps": 84.04, "params_b": 1.5, "accel": "AMD Radeon RX 6650 XT", "model": "Llama 3.2 1B Instruct"}, {"bw": 120.0, "tps": 9.59, "params_b": 14.77, "accel": "Apple M4 4P+6E+10GPU", "model": "Qwen2.5 14B Instruct"}, {"bw": 936.0, "tps": 111.51, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 3090", "model": "Llama 3.2 1B Instruct"}, {"bw": 936.0, "tps": 105.48, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 3090", "model": "Llama 3.2 1B Instruct"}, {"bw": 120.0, "tps": 67.43, "params_b": 1.5, "accel": "Apple M4 4P+6E", "model": "Llama 3.2 1B Instruct"}, {"bw": 120.0, "tps": 17.61, "params_b": 8.03, "accel": "Apple M4 4P+6E+10GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 120.0, "tps": 76.61, "params_b": 1.5, "accel": "Apple M4 4P+6E+10GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 936.0, "tps": 198.7, "params_b": 1.3, "accel": "NVIDIA GeForce RTX 3090", "model": "Gemma 3 1b It"}, {"bw": 256.0, "tps": 116.56, "params_b": 1.5, "accel": "NVIDIA GeForce GTX 1070 Ti", "model": "Llama 3.2 1B Instruct"}, {"bw": 768.0, "tps": 38.76, "params_b": 23.57, "accel": "NVIDIA RTX A5000", "model": "Mistral Small 24B Instruct 2501"}, {"bw": 768.0, "tps": 63.06, "params_b": 12.25, "accel": "NVIDIA RTX A5000", "model": "Mistral Nemo Instruct 2407"}, {"bw": 768.0, "tps": 86.58, "params_b": 8.02, "accel": "NVIDIA RTX A5000", "model": "Ministral 8B Instruct 2410 TEST"}, {"bw": 768.0, "tps": 310.05, "params_b": 1.5, "accel": "NVIDIA RTX A5000", "model": "Llama 3.2 1B Instruct"}, {"bw": 256.0, "tps": 114.33, "params_b": 1.5, "accel": "NVIDIA GeForce GTX 1070 Ti", "model": "Llama 3.2 1B Instruct"}, {"bw": 288.0, "tps": 216.24, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 4060 Ti", "model": "Llama 3.2 1B Instruct"}, {"bw": 936.0, "tps": 307.56, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 3090", "model": "Llama 3.2 1B Instruct"}, {"bw": 280.0, "tps": 82.33, "params_b": 1.5, "accel": "AMD Radeon RX 6650 XT", "model": "Llama 3.2 1B Instruct"}, {"bw": 280.0, "tps": 26.81, "params_b": 7.62, "accel": "AMD Radeon RX 6650 XT", "model": "Qwen2.5.1 Coder 7B Instruct"}, {"bw": 280.0, "tps": 82.95, "params_b": 1.5, "accel": "AMD Radeon RX 6650 XT", "model": "Llama 3.2 1B Instruct"}, {"bw": 280.0, "tps": 83.87, "params_b": 1.5, "accel": "AMD Radeon RX 6650 XT", "model": "Llama 3.2 1B Instruct"}, {"bw": 68.0, "tps": 40.72, "params_b": 1.5, "accel": "Apple M1 4P+4E+8GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 200.0, "tps": 79.99, "params_b": 1.5, "accel": "Apple M1 Pro 8P+2E+16GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 717.0, "tps": 129.62, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 4080", "model": "Llama 3.2 1B Instruct"}, {"bw": 288.0, "tps": 74.42, "params_b": 4.55, "accel": "NVIDIA GeForce RTX 4060 Ti", "model": "Gemma 3 4b It"}, {"bw": 280.0, "tps": 25.93, "params_b": 8.03, "accel": "AMD Radeon RX 6650 XT", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 280.0, "tps": 109.04, "params_b": 1.5, "accel": "AMD Radeon RX 6650 XT", "model": "Llama 3.2 1B Instruct"}, {"bw": 546.0, "tps": 51.57, "params_b": 8.03, "accel": "Apple M4 Max 12P+4E+40GPU", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 288.0, "tps": 26.9, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 4060 Ti", "model": "Qwen2.5 14B Instruct"}, {"bw": 288.0, "tps": 49.26, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 4060 Ti", "model": "Meta Llama 3.1 8B Instruct"}, {"bw": 546.0, "tps": 156.3, "params_b": 1.5, "accel": "Apple M4 Max 12P+4E", "model": "Llama 3.2 1B Instruct"}, {"bw": 288.0, "tps": 217.97, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 4060 Ti", "model": "Llama 3.2 1B Instruct"}, {"bw": 546.0, "tps": 184.22, "params_b": 1.5, "accel": "Apple M4 Max 12P+4E+40GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 200.0, "tps": 80.45, "params_b": 1.5, "accel": "Apple M1 Pro 8P+2E+16GPU", "model": "Llama 3.2 1B Instruct"}, {"bw": 936.0, "tps": 330.01, "params_b": 1.5, "accel": "NVIDIA GeForce RTX 3090", "model": "Llama 3.2 1B Instruct"}, {"bw": 936.0, "tps": 59.94, "params_b": 14.77, "accel": "NVIDIA GeForce RTX 3090", "model": "Qwen2.5 14B Instruct"}, {"bw": 936.0, "tps": 103.1, "params_b": 8.03, "accel": "NVIDIA GeForce RTX 3090", "model": "Meta Llama 3.1 8B Instruct"}]}
|