ruv commited on
Commit
8cf4175
·
verified ·
1 Parent(s): 1452d87

Add default TurboQuant KV-cache profile (ADR-129)

Browse files
Files changed (1) hide show
  1. default.turboquant.json +41 -0
default.turboquant.json ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "version": 1,
3
+ "model": "ruv/ruvltra-claude-code",
4
+ "default_bits": "3.5",
5
+ "default_eviction": "h2o",
6
+ "use_qjl": true,
7
+ "per_layer_config": {
8
+ "layer_0": {
9
+ "bits": "4.0",
10
+ "reason": "boundary layer \u2014 higher precision for input/output"
11
+ },
12
+ "layer_1": {
13
+ "bits": "4.0",
14
+ "reason": "boundary layer \u2014 higher precision for input/output"
15
+ },
16
+ "layer_2": {
17
+ "bits": "3.5",
18
+ "reason": "early layer \u2014 moderate compression"
19
+ },
20
+ "layer_3": {
21
+ "bits": "3.5",
22
+ "reason": "early layer \u2014 moderate compression"
23
+ },
24
+ "layer_4": {
25
+ "bits": "3.5",
26
+ "reason": "early layer \u2014 moderate compression"
27
+ },
28
+ "layer_5": {
29
+ "bits": "3.5",
30
+ "reason": "early layer \u2014 moderate compression"
31
+ },
32
+ "layer_22": {
33
+ "bits": "4.0",
34
+ "reason": "boundary layer \u2014 higher precision for input/output"
35
+ },
36
+ "layer_23": {
37
+ "bits": "4.0",
38
+ "reason": "boundary layer \u2014 higher precision for input/output"
39
+ }
40
+ }
41
+ }