Spaces:
Paused
Paused
File size: 1,550 Bytes
751ad26 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 | from __future__ import annotations
from dataclasses import dataclass
@dataclass(frozen=True, slots=True)
class ExecutionProfile:
sink_window_tokens: int
recent_window_tokens: int
relevance_top_k: int
relevance_mode: str = "envelope"
relevance_sketch_size: int = 1
exact_refine_top_k: int = 0
approximate_old_pages: bool = False
def resolve_execution_profile(name: str | None, *, context_length: int) -> ExecutionProfile | None:
if name is None or name == "none":
return None
if name == "m4_envelope_fast":
return ExecutionProfile(
sink_window_tokens=256,
recent_window_tokens=1_024,
relevance_top_k=2,
)
if name == "m4_envelope_balanced":
return ExecutionProfile(
sink_window_tokens=256,
recent_window_tokens=1_024,
relevance_top_k=4,
)
if name == "m4_envelope_auto":
if context_length <= 4_096:
return ExecutionProfile(
sink_window_tokens=256,
recent_window_tokens=1_024,
relevance_top_k=4,
)
if context_length <= 8_192:
return ExecutionProfile(
sink_window_tokens=256,
recent_window_tokens=2_048,
relevance_top_k=4,
)
return ExecutionProfile(
sink_window_tokens=256,
recent_window_tokens=4_096,
relevance_top_k=8,
)
raise ValueError(f"unknown execution profile: {name}")
|