harshithsaiv commited on
Commit
c7426b8
·
1 Parent(s): 999cd20

feat: Llama results for long context benchmark

Browse files
results/llama-3-8b/long_context_results.json ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model": "llama-3-8b",
3
+ "results": [
4
+ {
5
+ "context_len": 512,
6
+ "peak_memory_gb": 16.27,
7
+ "fp16_mb": 67.11,
8
+ "uniform8_mb": 33.55,
9
+ "mixed_precision_mb": 32.9,
10
+ "compression_vs_fp16": 2.04,
11
+ "compression_vs_8bit": 1.02,
12
+ "prefill_ms": 50.3
13
+ },
14
+ {
15
+ "context_len": 1024,
16
+ "peak_memory_gb": 16.47,
17
+ "fp16_mb": 134.22,
18
+ "uniform8_mb": 67.11,
19
+ "mixed_precision_mb": 65.8,
20
+ "compression_vs_fp16": 2.04,
21
+ "compression_vs_8bit": 1.02,
22
+ "prefill_ms": 89.1
23
+ },
24
+ {
25
+ "context_len": 2048,
26
+ "peak_memory_gb": 16.88,
27
+ "fp16_mb": 268.44,
28
+ "uniform8_mb": 134.22,
29
+ "mixed_precision_mb": 131.6,
30
+ "compression_vs_fp16": 2.04,
31
+ "compression_vs_8bit": 1.02,
32
+ "prefill_ms": 172.4
33
+ },
34
+ {
35
+ "context_len": 4096,
36
+ "peak_memory_gb": 17.69,
37
+ "fp16_mb": 536.87,
38
+ "uniform8_mb": 268.44,
39
+ "mixed_precision_mb": 263.2,
40
+ "compression_vs_fp16": 2.04,
41
+ "compression_vs_8bit": 1.02,
42
+ "prefill_ms": 349.8
43
+ },
44
+ {
45
+ "context_len": 8192,
46
+ "peak_memory_gb": 19.31,
47
+ "fp16_mb": 1073.74,
48
+ "uniform8_mb": 536.87,
49
+ "mixed_precision_mb": 526.39,
50
+ "compression_vs_fp16": 2.04,
51
+ "compression_vs_8bit": 1.02,
52
+ "prefill_ms": 735.4
53
+ },
54
+ {
55
+ "context_len": 16384,
56
+ "peak_memory_gb": 22.55,
57
+ "fp16_mb": 2147.48,
58
+ "uniform8_mb": 1073.74,
59
+ "mixed_precision_mb": 1052.77,
60
+ "compression_vs_fp16": 2.04,
61
+ "compression_vs_8bit": 1.02,
62
+ "prefill_ms": 1628.0
63
+ },
64
+ {
65
+ "context_len": 32768,
66
+ "peak_memory_gb": "OOM",
67
+ "fp16_mb": 4294.967296,
68
+ "note": "FP16 OOM, compressed might fit"
69
+ }
70
+ ]
71
+ }