from time import time import pytest import torch import kornia points_shapes = [(64, 1024 ** 2, 3), (8192, 8192, 3), (1024 ** 2, 64, 3)] # TODO: remove xfail once we have enough gpu bandwidth in the CI @pytest.mark.xfail(reason='May cause memory issues.') def test_performance_speed(device, dtype): if device.type != 'cuda' or not torch.cuda.is_available(): pytest.skip("Cuda not available in system,") print("Benchmarking project_points") for input_shape in points_shapes: inpt = torch.rand(input_shape).to(device) pose = torch.rand((1, 4, 4)).to(device) torch.cuda.synchronize(device) t = time() kornia.geometry.transform_points(pose, inpt) torch.cuda.synchronize(device) print(f"inp={input_shape}, dev={device}, {time() - t}, sec")