NeMo_Canary / tests /export /test_tensorrt_lazy_compiler.py
Respair's picture
Upload folder using huggingface_hub
b386992 verified
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import tempfile
import unittest
from unittest.mock import MagicMock, patch
import pytest
import torch.nn as nn
@pytest.mark.run_only_on('GPU')
class SimpleModel(nn.Module):
@pytest.mark.run_only_on('GPU')
def __init__(self):
super().__init__()
self.conv = nn.Conv2d(3, 64, kernel_size=3, padding=1)
self.relu = nn.ReLU()
@pytest.mark.run_only_on('GPU')
def forward(self, x):
return self.relu(self.conv(x))
@pytest.mark.run_only_on('GPU')
class TestTensorRTLazyCompiler(unittest.TestCase):
@pytest.mark.run_only_on('GPU')
def setUp(self):
self.model = SimpleModel()
self.temp_dir = tempfile.mkdtemp()
self.plan_path = os.path.join(self.temp_dir, "test_model.plan")
@pytest.mark.run_only_on('GPU')
def tearDown(self):
if os.path.exists(self.plan_path):
os.remove(self.plan_path)
os.rmdir(self.temp_dir)
@pytest.mark.run_only_on('GPU')
def test_get_profile_shapes(self):
from nemo.export.tensorrt_lazy_compiler import get_profile_shapes
input_shape = [1, 3, 224, 224]
dynamic_batchsize = [1, 4, 8]
min_shape, opt_shape, max_shape = get_profile_shapes(input_shape, dynamic_batchsize)
self.assertEqual(min_shape, [1, 3, 224, 224])
self.assertEqual(opt_shape, [4, 3, 224, 224])
self.assertEqual(max_shape, [8, 3, 224, 224])
# Test with None dynamic_batchsize
min_shape, opt_shape, max_shape = get_profile_shapes(input_shape, None)
self.assertEqual(min_shape, input_shape)
self.assertEqual(opt_shape, input_shape)
self.assertEqual(max_shape, input_shape)
@pytest.mark.run_only_on('GPU')
def test_get_dynamic_axes(self):
from nemo.export.tensorrt_lazy_compiler import get_dynamic_axes
profiles = [{"input": [[1, 3, 224, 224], [4, 3, 224, 224], [8, 3, 224, 224]]}]
dynamic_axes = get_dynamic_axes(profiles)
self.assertEqual(dynamic_axes, {"input": [0]})
# Test with empty profiles
dynamic_axes = get_dynamic_axes([])
self.assertEqual(dynamic_axes, {})
@pytest.mark.run_only_on('GPU')
@patch('nemo.export.tensorrt_lazy_compiler.trt_imported', True)
@patch('nemo.export.tensorrt_lazy_compiler.polygraphy_imported', True)
@patch('torch.cuda.is_available', return_value=True)
def test_trt_compile_basic(self, mock_cuda_available):
from nemo.export.tensorrt_lazy_compiler import trt_compile
# Test basic compilation
compiled_model = trt_compile(
self.model,
self.plan_path,
args={"method": "onnx", "precision": "fp16", "build_args": {"builder_optimization_level": 5}},
)
self.assertEqual(compiled_model, self.model)
self.assertTrue(hasattr(compiled_model, '_trt_compiler'))
@pytest.mark.run_only_on('GPU')
@patch('nemo.export.tensorrt_lazy_compiler.trt_imported', False)
def test_trt_compile_no_tensorrt(self):
from nemo.export.tensorrt_lazy_compiler import trt_compile
# Test when TensorRT is not available
compiled_model = trt_compile(self.model, self.plan_path)
self.assertEqual(compiled_model, self.model)
self.assertFalse(hasattr(compiled_model, '_trt_compiler'))
@pytest.mark.run_only_on('GPU')
def test_trt_compiler_initialization(self):
from nemo.export.tensorrt_lazy_compiler import TrtCompiler
compiler = TrtCompiler(
self.model,
self.plan_path,
precision="fp16",
method="onnx",
input_names=["x"],
output_names=["output"],
logger=MagicMock(),
)
self.assertEqual(compiler.plan_path, self.plan_path)
self.assertEqual(compiler.precision, "fp16")
self.assertEqual(compiler.method, "onnx")
self.assertEqual(compiler.input_names, ["x"])
self.assertEqual(compiler.output_names, ["output"])
@pytest.mark.run_only_on('GPU')
def test_trt_compiler_invalid_precision(self):
from nemo.export.tensorrt_lazy_compiler import TrtCompiler
with self.assertRaises(ValueError):
TrtCompiler(self.model, self.plan_path, precision="invalid_precision")
@pytest.mark.run_only_on('GPU')
def test_trt_compiler_invalid_method(self):
from nemo.export.tensorrt_lazy_compiler import TrtCompiler
with self.assertRaises(ValueError):
TrtCompiler(self.model, self.plan_path, method="invalid_method")
@pytest.mark.run_only_on('GPU')
@patch('nemo.export.tensorrt_lazy_compiler.trt_imported', True)
@patch('nemo.export.tensorrt_lazy_compiler.polygraphy_imported', True)
@patch('torch.cuda.is_available', return_value=True)
def test_trt_compile_with_submodule(self, mock_cuda_available):
from nemo.export.tensorrt_lazy_compiler import trt_compile
class NestedModel(nn.Module):
def __init__(self):
super().__init__()
self.submodule = SimpleModel()
model = NestedModel()
compiled_model = trt_compile(model, self.plan_path, submodule=["submodule"])
self.assertEqual(compiled_model, model)
self.assertTrue(hasattr(model.submodule, '_trt_compiler'))
if __name__ == '__main__':
unittest.main()