harshithsaiv commited on
Commit
0f6e4c1
·
1 Parent(s): cfe6f8e

impl: Benchmark results of Llama

Browse files
results/llama-3-8b/benchmark_results.json ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model": "llama-3-8b",
3
+ "avg_bits": 7.84,
4
+ "compression": [
5
+ {
6
+ "context_len": 512,
7
+ "fp16_mb": 67.11,
8
+ "uniform8_mb": 33.55,
9
+ "mixed_precision_mb": 32.9,
10
+ "compression_vs_fp16": 2.04,
11
+ "compression_vs_8bit": 1.02
12
+ },
13
+ {
14
+ "context_len": 1024,
15
+ "fp16_mb": 134.22,
16
+ "uniform8_mb": 67.11,
17
+ "mixed_precision_mb": 65.8,
18
+ "compression_vs_fp16": 2.04,
19
+ "compression_vs_8bit": 1.02
20
+ },
21
+ {
22
+ "context_len": 2048,
23
+ "fp16_mb": 268.44,
24
+ "uniform8_mb": 134.22,
25
+ "mixed_precision_mb": 131.6,
26
+ "compression_vs_fp16": 2.04,
27
+ "compression_vs_8bit": 1.02
28
+ },
29
+ {
30
+ "context_len": 4096,
31
+ "fp16_mb": 536.87,
32
+ "uniform8_mb": 268.44,
33
+ "mixed_precision_mb": 263.2,
34
+ "compression_vs_fp16": 2.04,
35
+ "compression_vs_8bit": 1.02
36
+ },
37
+ {
38
+ "context_len": 8192,
39
+ "fp16_mb": 1073.74,
40
+ "uniform8_mb": 536.87,
41
+ "mixed_precision_mb": 526.39,
42
+ "compression_vs_fp16": 2.04,
43
+ "compression_vs_8bit": 1.02
44
+ }
45
+ ],
46
+ "memory": [
47
+ {
48
+ "context": 1024,
49
+ "peak_memory_gb": 16.47
50
+ },
51
+ {
52
+ "context": 4096,
53
+ "peak_memory_gb": 17.69
54
+ },
55
+ {
56
+ "context": 8192,
57
+ "peak_memory_gb": 19.31
58
+ }
59
+ ],
60
+ "decode_tokens_per_sec": 36.7,
61
+ "perplexity": 20.7,
62
+ "summary": {
63
+ "fp16_8k_mb": 1073.74,
64
+ "ours_8k_mb": 526.39,
65
+ "compression_8k": 2.04
66
+ }
67
+ }