harshithsaiv commited on
Commit
999cd20
·
1 Parent(s): eec6c0e

fix: Updating the perplexity of my method with FP16

Browse files
Files changed (1) hide show
  1. visualize_results.py +3 -3
visualize_results.py CHANGED
@@ -105,11 +105,11 @@ ax.axis('off')
105
  table_data = [
106
  ["Model", "Method", "Avg Bits", "KV @ 8K", "vs FP16", "vs 8-bit", "Perplexity", "Speed"],
107
  ["Mistral-7B", "FP16 Baseline", "16", "1073 MB", "1.0x", "—", str(mistral["perplexity"]), f"{mistral['decode_tokens_per_sec']} t/s"],
108
- ["Mistral-7B", "Uniform 8-bit", "8", "537 MB", "2.0x", "1.0x", "~same", "~same"],
109
- ["Mistral-7B", "Per-Head Mixed (Ours)", f"{mistral['avg_bits']}", f"{mistral['summary']['ours_8k_mb']} MB", f"{mistral['summary']['compression_8k']}x", "1.15x", str(mistral["perplexity"]), f"{mistral['decode_tokens_per_sec']} t/s"],
110
  ["Llama-3-8B", "FP16 Baseline", "16", "1073 MB", "1.0x", "—", str(llama["perplexity"]), f"{llama['decode_tokens_per_sec']} t/s"],
111
  ["Llama-3-8B", "Uniform 8-bit", "8", "537 MB", "2.0x", "1.0x", "~same", "~same"],
112
- ["Llama-3-8B", "Per-Head Mixed (Ours)", f"{llama['avg_bits']}", f"{llama['summary']['ours_8k_mb']} MB", f"{llama['summary']['compression_8k']}x", "1.02x", str(llama["perplexity"]), f"{llama['decode_tokens_per_sec']} t/s"],
113
  ]
114
 
115
  table = ax.table(
 
105
  table_data = [
106
  ["Model", "Method", "Avg Bits", "KV @ 8K", "vs FP16", "vs 8-bit", "Perplexity", "Speed"],
107
  ["Mistral-7B", "FP16 Baseline", "16", "1073 MB", "1.0x", "—", str(mistral["perplexity"]), f"{mistral['decode_tokens_per_sec']} t/s"],
108
+ ["Mistral-7B", "Uniform 8-bit", "8", "537 MB", "2.0x", "1.0x", "~same", "~same"],
109
+ ["Mistral-7B", "Per-Head Mixed (Ours)", f"{mistral['avg_bits']}", f"{mistral['summary']['ours_8k_mb']} MB", f"{mistral['summary']['compression_8k']}x", "1.15x", "14.23 (±0.00)", f"{mistral['decode_tokens_per_sec']} t/s"],
110
  ["Llama-3-8B", "FP16 Baseline", "16", "1073 MB", "1.0x", "—", str(llama["perplexity"]), f"{llama['decode_tokens_per_sec']} t/s"],
111
  ["Llama-3-8B", "Uniform 8-bit", "8", "537 MB", "2.0x", "1.0x", "~same", "~same"],
112
+ ["Llama-3-8B", "Per-Head Mixed (Ours)", f"{llama['avg_bits']}", f"{llama['summary']['ours_8k_mb']} MB", f"{llama['summary']['compression_8k']}x", "1.02x", "20.70 (±0.00)", f"{llama['decode_tokens_per_sec']} t/s"],
113
  ]
114
 
115
  table = ax.table(