leideng/QCFuse / srt /compilation /compilation_counter.py
leideng's picture
download
raw
1.62 kB
# Adapted from https://github.com/vllm-project/vllm/blob/v0.10.0/vllm/compilation/compilation_counter.py
import copy
import dataclasses
from contextlib import contextmanager
@dataclasses.dataclass
class CompilationCounter:
num_models_seen: int = 0
num_graphs_seen: int = 0
# including the splitting ops
num_piecewise_graphs_seen: int = 0
# not including the splitting ops
num_piecewise_capturable_graphs_seen: int = 0
num_backend_compilations: int = 0
# Number of gpu_model_runner attempts to trigger CUDAGraphs capture
num_gpu_runner_capture_triggers: int = 0
# Number of CUDAGraphs captured
num_cudagraph_captured: int = 0
# InductorAdapter.compile calls
num_inductor_compiles: int = 0
# EagerAdapter.compile calls
num_eager_compiles: int = 0
# The number of time vLLM's compiler cache entry was updated
num_cache_entries_updated: int = 0
# The number of standalone_compile compiled artifacts saved
num_compiled_artifacts_saved: int = 0
# Number of times a model was loaded with CompilationLevel.DYNAMO_AS_IS
dynamo_as_is_count: int = 0
def clone(self) -> "CompilationCounter":
return copy.deepcopy(self)
@contextmanager
def expect(self, **kwargs):
old = self.clone()
yield
for k, v in kwargs.items():
assert getattr(self, k) - getattr(old, k) == v, (
f"{k} not as expected, before it is {getattr(old, k)}"
f", after it is {getattr(self, k)}, "
f"expected diff is {v}"
)
compilation_counter = CompilationCounter()

Xet Storage Details

Size:
1.62 kB
·
Xet hash:
c13ed98679ff3661cb1d246ba1d59a3285c36391b6252a1f1b65e3123032c8a5

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.