Spaces:

enCoder
/

tiny-vllm

Running

minimal continuous-batching LLM engine

c32c359 11 days ago

765 Bytes

	"""Tiny vLLM — a minimal continuous-batching engine.

	Educational reimplementation of the core vLLM/SGLang ideas:
	paged KV cache, prefix caching, continuous batching with chunked prefill,
	and SSE streaming over a thin HTTP layer.
	"""

	# Lazy re-exports: importing this package should not pull in torch, so the
	# lightweight block_manager/scheduler can be unit-tested without it.

	from .config import EngineConfig, SamplingParams
	from .request import Request, Sequence, SequenceStatus

	__all__ = [
	"EngineConfig",
	"SamplingParams",
	"LLMEngine",
	"Request",
	"Sequence",
	"SequenceStatus",
	]


	def __getattr__(name: str):
	if name == "LLMEngine":
	from .engine import LLMEngine
	return LLMEngine
	raise AttributeError(name)