"""Tiny vLLM — a minimal continuous-batching engine. Educational reimplementation of the core vLLM/SGLang ideas: paged KV cache, prefix caching, continuous batching with chunked prefill, and SSE streaming over a thin HTTP layer. """ # Lazy re-exports: importing this package should not pull in torch, so the # lightweight block_manager/scheduler can be unit-tested without it. from .config import EngineConfig, SamplingParams from .request import Request, Sequence, SequenceStatus __all__ = [ "EngineConfig", "SamplingParams", "LLMEngine", "Request", "Sequence", "SequenceStatus", ] def __getattr__(name: str): if name == "LLMEngine": from .engine import LLMEngine return LLMEngine raise AttributeError(name)