""" ENGRAM Protocol — Test Fixtures Shared pytest fixtures for all test modules. Provides synthetic KV cache tensors at correct shapes, temp directories, and model specs. """ from __future__ import annotations from pathlib import Path import pytest import torch from kvcos.core.cache_spec import GEMMA_4_26B_A4B, LLAMA_3_1_8B, PHI_3_MINI from kvcos.core.types import AttentionType, CacheSection, ModelCacheSpec @pytest.fixture def llama_spec() -> ModelCacheSpec: """Llama 3.1 8B model spec.""" return LLAMA_3_1_8B @pytest.fixture def phi3_spec() -> ModelCacheSpec: """Phi-3-Mini model spec.""" return PHI_3_MINI @pytest.fixture def gemma4_spec() -> ModelCacheSpec: """Gemma 4 26B-A4B ISWA model spec.""" return GEMMA_4_26B_A4B @pytest.fixture def tmp_data_dir(tmp_path: Path) -> Path: """Temporary data directory for storage tests.""" data_dir = tmp_path / "engram_data" data_dir.mkdir() return data_dir @pytest.fixture def tmp_index_dir(tmp_path: Path) -> Path: """Temporary directory for FAISS index persistence tests.""" index_dir = tmp_path / "engram_index" index_dir.mkdir() return index_dir def make_synthetic_kv( spec: ModelCacheSpec, ctx_len: int = 256, seed: int = 42, ) -> tuple[torch.Tensor, torch.Tensor]: """Create synthetic KV cache tensors with correct shapes. Returns (keys, values) each [n_layers, n_kv_heads, ctx_len, head_dim]. Values are random but reproducible via seed. """ torch.manual_seed(seed) shape = (spec["n_layers"], spec["n_kv_heads"], ctx_len, spec["head_dim"]) keys = torch.randn(shape, dtype=torch.float16) values = torch.randn(shape, dtype=torch.float16) return keys, values @pytest.fixture def llama_kv_256(llama_spec: ModelCacheSpec) -> tuple[torch.Tensor, torch.Tensor]: """Synthetic Llama 3.1 8B KV cache, 256 tokens. Shape: [32, 8, 256, 128] for both keys and values. """ return make_synthetic_kv(llama_spec, ctx_len=256) @pytest.fixture def llama_kv_1024(llama_spec: ModelCacheSpec) -> tuple[torch.Tensor, torch.Tensor]: """Synthetic Llama 3.1 8B KV cache, 1024 tokens.""" return make_synthetic_kv(llama_spec, ctx_len=1024, seed=123) @pytest.fixture def phi3_kv_256(phi3_spec: ModelCacheSpec) -> tuple[torch.Tensor, torch.Tensor]: """Synthetic Phi-3-Mini KV cache, 256 tokens. Shape: [32, 32, 256, 96] for both keys and values. """ return make_synthetic_kv(phi3_spec, ctx_len=256, seed=99) # ── ISWA Fixtures ──────────────────────────────────────────────────────────── def make_synthetic_iswa_blob( sections: tuple[CacheSection, ...], n_cells: int = 4, arch: str = "gemma4", v_trans: bool = True, seed: int = 42, ) -> bytes: """Build a synthetic ISWA blob with multiple KV cache sections. Matches llama.cpp state blob format for ISWA models: 1. Architecture string header 2. n_stream = len(sections) 3. Per stream: cell metadata + K/V data per layer Args: sections: Cache sections (e.g., global + SWA for Gemma 4). n_cells: Number of KV cells per section. arch: Architecture string in blob header. v_trans: Whether V tensors are stored transposed. seed: Random seed for reproducible data. """ import struct import numpy as np from kvcos.core.blob_parser import GGML_TYPE_F16 rng = np.random.RandomState(seed) parts: list[bytes] = [] # 1. Architecture string header parts.append(struct.pack(" bytes: """Synthetic Gemma 4 ISWA blob with 2 sections, 4 cells.""" return make_synthetic_iswa_blob(GEMMA4_SECTIONS, n_cells=4) @pytest.fixture def gemma4_iswa_blob_8cells() -> bytes: """Synthetic Gemma 4 ISWA blob with 2 sections, 8 cells.""" return make_synthetic_iswa_blob(GEMMA4_SECTIONS, n_cells=8, seed=99)