|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
from __future__ import annotations |
|
|
|
|
|
import tempfile |
|
|
import unittest |
|
|
from typing import List |
|
|
|
|
|
import torch |
|
|
|
|
|
TEST_CASE_1 = ["fp32"] |
|
|
TEST_CASE_2 = ["fp16"] |
|
|
|
|
|
|
|
|
class ListAdd(torch.nn.Module): |
|
|
def __init__(self): |
|
|
super().__init__() |
|
|
|
|
|
def forward(self, x: List[torch.Tensor], y: torch.Tensor, z: torch.Tensor, bs: float = 0.1): |
|
|
y1 = y.clone() |
|
|
x1 = x.copy() |
|
|
z1 = z + y |
|
|
for xi in x: |
|
|
y1 = y1 + xi + bs |
|
|
return x1, [y1, z1], y1 + z1 |
|
|
|
|
|
|
|
|
@unittest.skip |
|
|
class TestTRTCompile(unittest.TestCase): |
|
|
|
|
|
def setUp(self): |
|
|
self.gpu_device = torch.cuda.current_device() |
|
|
|
|
|
def tearDown(self): |
|
|
current_device = torch.cuda.current_device() |
|
|
if current_device != self.gpu_device: |
|
|
torch.cuda.set_device(self.gpu_device) |
|
|
|
|
|
def test_torch_trt(self): |
|
|
|
|
|
model = torch.nn.Sequential(*[torch.nn.PReLU(), torch.nn.PReLU()]) |
|
|
data1 = model.state_dict() |
|
|
data1["0.weight"] = torch.tensor([0.1]) |
|
|
data1["1.weight"] = torch.tensor([0.2]) |
|
|
model.load_state_dict(data1) |
|
|
model.cuda() |
|
|
x = torch.randn(1, 16).to("cuda") |
|
|
|
|
|
with tempfile.TemporaryDirectory() as tempdir: |
|
|
args = { |
|
|
"method": "torch_trt", |
|
|
"dynamic_batchsize": [1, 4, 8], |
|
|
} |
|
|
input_example = (x,) |
|
|
output_example = model(*input_example) |
|
|
trt_compile( |
|
|
model, |
|
|
f"{tempdir}/test_lists", |
|
|
args=args, |
|
|
) |
|
|
self.assertIsNone(model._trt_compiler.engine) |
|
|
trt_output = model(*input_example) |
|
|
|
|
|
self.assertIsNotNone(model._trt_compiler.engine) |
|
|
torch.testing.assert_close(trt_output, output_example, rtol=0.01, atol=0.01) |
|
|
|
|
|
def test_profiles(self): |
|
|
model = ListAdd().cuda() |
|
|
|
|
|
with torch.no_grad(), tempfile.TemporaryDirectory() as tmpdir: |
|
|
args = { |
|
|
"export_args": { |
|
|
"dynamo": False, |
|
|
}, |
|
|
"input_profiles": [ |
|
|
{ |
|
|
"x_0": [[1, 8], [2, 16], [2, 32]], |
|
|
"x_1": [[1, 8], [2, 16], [2, 32]], |
|
|
"x_2": [[1, 8], [2, 16], [2, 32]], |
|
|
"y": [[1, 8], [2, 16], [2, 32]], |
|
|
"z": [[1, 8], [1, 16], [1, 32]], |
|
|
} |
|
|
], |
|
|
"output_lists": [[-1], [2], []], |
|
|
} |
|
|
x = torch.randn(1, 16).to("cuda") |
|
|
y = torch.randn(1, 16).to("cuda") |
|
|
z = torch.randn(1, 16).to("cuda") |
|
|
input_example = ([x, y, z], y.clone(), z.clone()) |
|
|
output_example = model(*input_example) |
|
|
trt_compile( |
|
|
model, |
|
|
f"{tmpdir}/test_dynamo_trt", |
|
|
args=args, |
|
|
) |
|
|
self.assertIsNone(model._trt_compiler.engine) |
|
|
trt_output = model(*input_example) |
|
|
|
|
|
self.assertIsNotNone(model._trt_compiler.engine) |
|
|
torch.testing.assert_close(trt_output, output_example, rtol=0.01, atol=0.01) |
|
|
|
|
|
def test_lists(self): |
|
|
model = ListAdd().cuda() |
|
|
|
|
|
with torch.no_grad(), tempfile.TemporaryDirectory() as tmpdir: |
|
|
args = { |
|
|
"export_args": { |
|
|
"dynamo": True, |
|
|
}, |
|
|
"output_lists": [[-1], [2], []], |
|
|
} |
|
|
x = torch.randn(1, 16).to("cuda") |
|
|
y = torch.randn(1, 16).to("cuda") |
|
|
z = torch.randn(1, 16).to("cuda") |
|
|
input_example = ([x, y, z], y.clone(), z.clone()) |
|
|
output_example = model(*input_example) |
|
|
trt_compile( |
|
|
model, |
|
|
f"{tmpdir}/test_lists", |
|
|
args=args, |
|
|
) |
|
|
self.assertIsNone(model._trt_compiler.engine) |
|
|
trt_output = model(*input_example) |
|
|
|
|
|
self.assertIsNotNone(model._trt_compiler.engine) |
|
|
torch.testing.assert_close(trt_output, output_example, rtol=0.01, atol=0.01) |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
unittest.main() |
|
|
|