Spaces:
Runtime error
Runtime error
| #!/usr/bin/env python | |
| # pylint: disable=cell-var-from-loop | |
| """ | |
| Test Torch Dynamo functionality and backends | |
| """ | |
| import json | |
| import warnings | |
| import numpy as np | |
| import torch | |
| from torchvision.models import resnet18 | |
| print('torch:', torch.__version__) | |
| try: | |
| # must be imported explicitly or namespace is not found | |
| import torch._dynamo as dynamo # pylint: disable=ungrouped-imports | |
| except Exception as err: | |
| print('torch without dynamo support', err) | |
| N_ITERS = 20 | |
| torch._dynamo.config.verbose=True # pylint: disable=protected-access | |
| warnings.filterwarnings('ignore', category=UserWarning) # disable those for now as many backends reports tons | |
| # torch.set_float32_matmul_precision('high') # enable to test in fp32 | |
| def timed(fn): # returns the result of running `fn()` and the time it took for `fn()` to run in ms using CUDA events | |
| start = torch.cuda.Event(enable_timing=True) | |
| end = torch.cuda.Event(enable_timing=True) | |
| start.record() | |
| result = fn() | |
| end.record() | |
| torch.cuda.synchronize() | |
| return result, start.elapsed_time(end) | |
| def generate_data(b): | |
| return ( | |
| torch.randn(b, 3, 128, 128).to(torch.float32).cuda(), | |
| torch.randint(1000, (b,)).cuda(), | |
| ) | |
| def init_model(): | |
| return resnet18().to(torch.float32).cuda() | |
| def evaluate(mod, val): | |
| return mod(val) | |
| if __name__ == '__main__': | |
| # first pass, dynamo is going to be slower as it compiles | |
| model = init_model() | |
| inp = generate_data(16)[0] | |
| # repeat test | |
| results = {} | |
| times = [] | |
| print('eager initial eval:', timed(lambda: evaluate(model, inp))[1]) | |
| for _i in range(N_ITERS): | |
| inp = generate_data(16)[0] | |
| _res, time = timed(lambda: evaluate(model, inp)) # noqa: B023 | |
| times.append(time) | |
| results['default'] = np.median(times) | |
| print('dynamo available backends:', dynamo.list_backends()) | |
| for backend in dynamo.list_backends(): | |
| try: | |
| # required before changing backends | |
| torch._dynamo.reset() # pylint: disable=protected-access | |
| eval_dyn = dynamo.optimize(backend)(evaluate) | |
| print('dynamo initial eval:', backend, timed(lambda: eval_dyn(model, inp))[1]) # noqa: B023 | |
| times = [] | |
| for _i in range(N_ITERS): | |
| inp = generate_data(16)[0] | |
| _res, time = timed(lambda: eval_dyn(model, inp)) # noqa: B023 | |
| times.append(time) | |
| results[backend] = np.median(times) | |
| except Exception as err: | |
| lines = str(err).split('\n') | |
| print('dyanmo backend failed:', backend, lines[0]) # print just first error line as backtraces can be quite long | |
| results[backend] = 'error' | |
| # print stats | |
| print(json.dumps(results, indent = 4)) | |
| """ | |
| Reference: <https://github.com/pytorch/pytorch/blob/4f4b62e4a255708e928445b6502139d5962974fa/docs/source/dynamo/get-started.rst> | |
| Training & Inference backends: | |
| dynamo.optimize("inductor") - Uses TorchInductor backend with AotAutograd and cudagraphs by leveraging codegened Triton kernels | |
| dynamo.optimize("aot_nvfuser") - nvFuser with AotAutograd | |
| dynamo.optimize("aot_cudagraphs") - cudagraphs with AotAutograd | |
| Inference-only backends: | |
| dynamo.optimize("ofi") - Uses Torchscript optimize_for_inference | |
| dynamo.optimize("fx2trt") - Uses Nvidia TensorRT for inference optimizations | |
| dynamo.optimize("onnxrt") - Uses ONNXRT for inference on CPU/GPU | |
| """ | |