| # Adapted from https://github.com/vllm-project/vllm/blob/v0.10.0/vllm/compilation/compilation_counter.py | |
| import copy | |
| import dataclasses | |
| from contextlib import contextmanager | |
| class CompilationCounter: | |
| num_models_seen: int = 0 | |
| num_graphs_seen: int = 0 | |
| # including the splitting ops | |
| num_piecewise_graphs_seen: int = 0 | |
| # not including the splitting ops | |
| num_piecewise_capturable_graphs_seen: int = 0 | |
| num_backend_compilations: int = 0 | |
| # Number of gpu_model_runner attempts to trigger CUDAGraphs capture | |
| num_gpu_runner_capture_triggers: int = 0 | |
| # Number of CUDAGraphs captured | |
| num_cudagraph_captured: int = 0 | |
| # InductorAdapter.compile calls | |
| num_inductor_compiles: int = 0 | |
| # EagerAdapter.compile calls | |
| num_eager_compiles: int = 0 | |
| # The number of time vLLM's compiler cache entry was updated | |
| num_cache_entries_updated: int = 0 | |
| # The number of standalone_compile compiled artifacts saved | |
| num_compiled_artifacts_saved: int = 0 | |
| # Number of times a model was loaded with CompilationLevel.DYNAMO_AS_IS | |
| dynamo_as_is_count: int = 0 | |
| def clone(self) -> "CompilationCounter": | |
| return copy.deepcopy(self) | |
| def expect(self, **kwargs): | |
| old = self.clone() | |
| yield | |
| for k, v in kwargs.items(): | |
| assert getattr(self, k) - getattr(old, k) == v, ( | |
| f"{k} not as expected, before it is {getattr(old, k)}" | |
| f", after it is {getattr(self, k)}, " | |
| f"expected diff is {v}" | |
| ) | |
| compilation_counter = CompilationCounter() | |
Xet Storage Details
- Size:
- 1.62 kB
- Xet hash:
- c13ed98679ff3661cb1d246ba1d59a3285c36391b6252a1f1b65e3123032c8a5
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.