File size: 765 Bytes
c32c359
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
"""Tiny vLLM — a minimal continuous-batching engine.

Educational reimplementation of the core vLLM/SGLang ideas:
paged KV cache, prefix caching, continuous batching with chunked prefill,
and SSE streaming over a thin HTTP layer.
"""

# Lazy re-exports: importing this package should not pull in torch, so the
# lightweight block_manager/scheduler can be unit-tested without it.

from .config import EngineConfig, SamplingParams
from .request import Request, Sequence, SequenceStatus

__all__ = [
    "EngineConfig",
    "SamplingParams",
    "LLMEngine",
    "Request",
    "Sequence",
    "SequenceStatus",
]


def __getattr__(name: str):
    if name == "LLMEngine":
        from .engine import LLMEngine
        return LLMEngine
    raise AttributeError(name)