Commit ·
999cd20
1
Parent(s): eec6c0e
fix: Updating the perplexity of my method with FP16
Browse files- visualize_results.py +3 -3
visualize_results.py
CHANGED
|
@@ -105,11 +105,11 @@ ax.axis('off')
|
|
| 105 |
table_data = [
|
| 106 |
["Model", "Method", "Avg Bits", "KV @ 8K", "vs FP16", "vs 8-bit", "Perplexity", "Speed"],
|
| 107 |
["Mistral-7B", "FP16 Baseline", "16", "1073 MB", "1.0x", "—", str(mistral["perplexity"]), f"{mistral['decode_tokens_per_sec']} t/s"],
|
| 108 |
-
["Mistral-7B", "Uniform 8-bit", "8", "537 MB", "2.0x", "1.0x", "~same",
|
| 109 |
-
["Mistral-7B", "Per-Head Mixed (Ours)", f"{mistral['avg_bits']}", f"{mistral['summary']['ours_8k_mb']} MB", f"{mistral['summary']['compression_8k']}x", "1.15x",
|
| 110 |
["Llama-3-8B", "FP16 Baseline", "16", "1073 MB", "1.0x", "—", str(llama["perplexity"]), f"{llama['decode_tokens_per_sec']} t/s"],
|
| 111 |
["Llama-3-8B", "Uniform 8-bit", "8", "537 MB", "2.0x", "1.0x", "~same", "~same"],
|
| 112 |
-
["Llama-3-8B", "Per-Head Mixed (Ours)", f"{llama['avg_bits']}", f"{llama['summary']['ours_8k_mb']} MB", f"{llama['summary']['compression_8k']}x", "1.02x",
|
| 113 |
]
|
| 114 |
|
| 115 |
table = ax.table(
|
|
|
|
| 105 |
table_data = [
|
| 106 |
["Model", "Method", "Avg Bits", "KV @ 8K", "vs FP16", "vs 8-bit", "Perplexity", "Speed"],
|
| 107 |
["Mistral-7B", "FP16 Baseline", "16", "1073 MB", "1.0x", "—", str(mistral["perplexity"]), f"{mistral['decode_tokens_per_sec']} t/s"],
|
| 108 |
+
["Mistral-7B", "Uniform 8-bit", "8", "537 MB", "2.0x", "1.0x", "~same", "~same"],
|
| 109 |
+
["Mistral-7B", "Per-Head Mixed (Ours)", f"{mistral['avg_bits']}", f"{mistral['summary']['ours_8k_mb']} MB", f"{mistral['summary']['compression_8k']}x", "1.15x", "14.23 (±0.00)", f"{mistral['decode_tokens_per_sec']} t/s"],
|
| 110 |
["Llama-3-8B", "FP16 Baseline", "16", "1073 MB", "1.0x", "—", str(llama["perplexity"]), f"{llama['decode_tokens_per_sec']} t/s"],
|
| 111 |
["Llama-3-8B", "Uniform 8-bit", "8", "537 MB", "2.0x", "1.0x", "~same", "~same"],
|
| 112 |
+
["Llama-3-8B", "Per-Head Mixed (Ours)", f"{llama['avg_bits']}", f"{llama['summary']['ours_8k_mb']} MB", f"{llama['summary']['compression_8k']}x", "1.02x", "20.70 (±0.00)", f"{llama['decode_tokens_per_sec']} t/s"],
|
| 113 |
]
|
| 114 |
|
| 115 |
table = ax.table(
|