| """Tiny vLLM — a minimal continuous-batching engine. | |
| Educational reimplementation of the core vLLM/SGLang ideas: | |
| paged KV cache, prefix caching, continuous batching with chunked prefill, | |
| and SSE streaming over a thin HTTP layer. | |
| """ | |
| # Lazy re-exports: importing this package should not pull in torch, so the | |
| # lightweight block_manager/scheduler can be unit-tested without it. | |
| from .config import EngineConfig, SamplingParams | |
| from .request import Request, Sequence, SequenceStatus | |
| __all__ = [ | |
| "EngineConfig", | |
| "SamplingParams", | |
| "LLMEngine", | |
| "Request", | |
| "Sequence", | |
| "SequenceStatus", | |
| ] | |
| def __getattr__(name: str): | |
| if name == "LLMEngine": | |
| from .engine import LLMEngine | |
| return LLMEngine | |
| raise AttributeError(name) | |