ruv commited on
Commit
a353bb7
·
verified ·
1 Parent(s): c861fb5

Add default TurboQuant KV-cache profile (ADR-129)

Browse files
Files changed (1) hide show
  1. default.turboquant.json +57 -0
default.turboquant.json ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "version": 1,
3
+ "model": "ruv/ruvltra-medium",
4
+ "default_bits": "3.5",
5
+ "default_eviction": "h2o",
6
+ "use_qjl": true,
7
+ "per_layer_config": {
8
+ "layer_0": {
9
+ "bits": "4.0",
10
+ "reason": "boundary layer \u2014 higher precision for input/output"
11
+ },
12
+ "layer_1": {
13
+ "bits": "4.0",
14
+ "reason": "boundary layer \u2014 higher precision for input/output"
15
+ },
16
+ "layer_2": {
17
+ "bits": "3.5",
18
+ "reason": "early layer \u2014 moderate compression"
19
+ },
20
+ "layer_3": {
21
+ "bits": "3.5",
22
+ "reason": "early layer \u2014 moderate compression"
23
+ },
24
+ "layer_4": {
25
+ "bits": "3.5",
26
+ "reason": "early layer \u2014 moderate compression"
27
+ },
28
+ "layer_5": {
29
+ "bits": "3.5",
30
+ "reason": "early layer \u2014 moderate compression"
31
+ },
32
+ "layer_6": {
33
+ "bits": "3.5",
34
+ "reason": "early layer \u2014 moderate compression"
35
+ },
36
+ "layer_7": {
37
+ "bits": "3.5",
38
+ "reason": "early layer \u2014 moderate compression"
39
+ },
40
+ "layer_8": {
41
+ "bits": "3.5",
42
+ "reason": "early layer \u2014 moderate compression"
43
+ },
44
+ "layer_9": {
45
+ "bits": "3.5",
46
+ "reason": "early layer \u2014 moderate compression"
47
+ },
48
+ "layer_40": {
49
+ "bits": "4.0",
50
+ "reason": "boundary layer \u2014 higher precision for input/output"
51
+ },
52
+ "layer_41": {
53
+ "bits": "4.0",
54
+ "reason": "boundary layer \u2014 higher precision for input/output"
55
+ }
56
+ }
57
+ }