sage / serve /kv_cache.py

feat: rewrite SAGE 1B architecture and replace legacy repo contents

ef18673 verified about 1 month ago

629 Bytes

	"""KV-cache helpers for inference."""

	from __future__ import annotations

	from dataclasses import dataclass

	import torch


	@dataclass
	class KVCache:
	"""Stores per-layer key/value tensors."""

	entries: list[tuple[torch.Tensor, torch.Tensor]]

	@classmethod
	def empty(cls, num_layers: int) -> "KVCache":
	"""Create an empty cache placeholder."""
	return cls(entries=[None] * num_layers) # type: ignore[list-item]

	def append(self, layer_index: int, key: torch.Tensor, value: torch.Tensor) -> None:
	"""Store one layer's key/value pair."""
	self.entries[layer_index] = (key, value)