| |
| |
| |
| |
| |
|
|
| """Test render speed.""" |
| import logging |
| import sys |
| from os import path |
|
|
| import torch |
| from fvcore.common.benchmark import benchmark |
| from pytorch3d.renderer.points.pulsar import Renderer |
| from torch.autograd import Variable |
|
|
|
|
| |
| sys.path.insert(0, path.join(path.dirname(__file__), "..")) |
| LOGGER = logging.getLogger(__name__) |
|
|
|
|
| """Measure the execution speed of the rendering. |
| |
| This measures a very pessimistic upper bound on speed, because synchronization |
| points have to be introduced in Python. On a pure PyTorch execution pipeline, |
| results should be significantly faster. You can get pure CUDA timings through |
| C++ by activating `PULSAR_TIMINGS_BATCHED_ENABLED` in the file |
| `pytorch3d/csrc/pulsar/logging.h` or defining it for your compiler. |
| """ |
|
|
|
|
| def _bm_pulsar(): |
| n_points = 1_000_000 |
| width = 1_000 |
| height = 1_000 |
| renderer = Renderer(width, height, n_points) |
| |
| torch.manual_seed(1) |
| vert_pos = torch.rand(n_points, 3, dtype=torch.float32) * 10.0 |
| vert_pos[:, 2] += 25.0 |
| vert_pos[:, :2] -= 5.0 |
| vert_col = torch.rand(n_points, 3, dtype=torch.float32) |
| vert_rad = torch.rand(n_points, dtype=torch.float32) |
| cam_params = torch.tensor( |
| [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 5.0, 2.0], dtype=torch.float32 |
| ) |
| device = torch.device("cuda") |
| vert_pos = vert_pos.to(device) |
| vert_col = vert_col.to(device) |
| vert_rad = vert_rad.to(device) |
| cam_params = cam_params.to(device) |
| renderer = renderer.to(device) |
| vert_pos_var = Variable(vert_pos, requires_grad=False) |
| vert_col_var = Variable(vert_col, requires_grad=False) |
| vert_rad_var = Variable(vert_rad, requires_grad=False) |
| cam_params_var = Variable(cam_params, requires_grad=False) |
|
|
| def bm_closure(): |
| renderer.forward( |
| vert_pos_var, |
| vert_col_var, |
| vert_rad_var, |
| cam_params_var, |
| 1.0e-1, |
| 45.0, |
| percent_allowed_difference=0.01, |
| ) |
| torch.cuda.synchronize() |
|
|
| return bm_closure |
|
|
|
|
| def _bm_pulsar_backward(): |
| n_points = 1_000_000 |
| width = 1_000 |
| height = 1_000 |
| renderer = Renderer(width, height, n_points) |
| |
| torch.manual_seed(1) |
| vert_pos = torch.rand(n_points, 3, dtype=torch.float32) * 10.0 |
| vert_pos[:, 2] += 25.0 |
| vert_pos[:, :2] -= 5.0 |
| vert_col = torch.rand(n_points, 3, dtype=torch.float32) |
| vert_rad = torch.rand(n_points, dtype=torch.float32) |
| cam_params = torch.tensor( |
| [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 5.0, 2.0], dtype=torch.float32 |
| ) |
| device = torch.device("cuda") |
| vert_pos = vert_pos.to(device) |
| vert_col = vert_col.to(device) |
| vert_rad = vert_rad.to(device) |
| cam_params = cam_params.to(device) |
| renderer = renderer.to(device) |
| vert_pos_var = Variable(vert_pos, requires_grad=True) |
| vert_col_var = Variable(vert_col, requires_grad=True) |
| vert_rad_var = Variable(vert_rad, requires_grad=True) |
| cam_params_var = Variable(cam_params, requires_grad=True) |
| res = renderer.forward( |
| vert_pos_var, |
| vert_col_var, |
| vert_rad_var, |
| cam_params_var, |
| 1.0e-1, |
| 45.0, |
| percent_allowed_difference=0.01, |
| ) |
| loss = res.sum() |
|
|
| def bm_closure(): |
| loss.backward(retain_graph=True) |
| torch.cuda.synchronize() |
|
|
| return bm_closure |
|
|
|
|
| def bm_pulsar() -> None: |
| if not torch.cuda.is_available(): |
| return |
|
|
| benchmark(_bm_pulsar, "PULSAR_FORWARD", [{}], warmup_iters=3) |
| benchmark(_bm_pulsar_backward, "PULSAR_BACKWARD", [{}], warmup_iters=3) |
|
|
|
|
| if __name__ == "__main__": |
| bm_pulsar() |
|
|