File size: 1,550 Bytes
751ad26
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
from __future__ import annotations

from dataclasses import dataclass


@dataclass(frozen=True, slots=True)
class ExecutionProfile:
    sink_window_tokens: int
    recent_window_tokens: int
    relevance_top_k: int
    relevance_mode: str = "envelope"
    relevance_sketch_size: int = 1
    exact_refine_top_k: int = 0
    approximate_old_pages: bool = False


def resolve_execution_profile(name: str | None, *, context_length: int) -> ExecutionProfile | None:
    if name is None or name == "none":
        return None
    if name == "m4_envelope_fast":
        return ExecutionProfile(
            sink_window_tokens=256,
            recent_window_tokens=1_024,
            relevance_top_k=2,
        )
    if name == "m4_envelope_balanced":
        return ExecutionProfile(
            sink_window_tokens=256,
            recent_window_tokens=1_024,
            relevance_top_k=4,
        )
    if name == "m4_envelope_auto":
        if context_length <= 4_096:
            return ExecutionProfile(
                sink_window_tokens=256,
                recent_window_tokens=1_024,
                relevance_top_k=4,
            )
        if context_length <= 8_192:
            return ExecutionProfile(
                sink_window_tokens=256,
                recent_window_tokens=2_048,
                relevance_top_k=4,
            )
        return ExecutionProfile(
            sink_window_tokens=256,
            recent_window_tokens=4_096,
            relevance_top_k=8,
        )
    raise ValueError(f"unknown execution profile: {name}")