NeMo_Canary / tests /export /test_tensorrt_lazy_compiler.py

Upload folder using huggingface_hub

b386992 verified 6 months ago

5.99 kB

	# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.


	import os
	import tempfile
	import unittest
	from unittest.mock import MagicMock, patch

	import pytest
	import torch.nn as nn


	@pytest.mark.run_only_on('GPU')
	class SimpleModel(nn.Module):
	@pytest.mark.run_only_on('GPU')
	def __init__(self):
	super().__init__()
	self.conv = nn.Conv2d(3, 64, kernel_size=3, padding=1)
	self.relu = nn.ReLU()

	@pytest.mark.run_only_on('GPU')
	def forward(self, x):
	return self.relu(self.conv(x))


	@pytest.mark.run_only_on('GPU')
	class TestTensorRTLazyCompiler(unittest.TestCase):

	@pytest.mark.run_only_on('GPU')
	def setUp(self):
	self.model = SimpleModel()
	self.temp_dir = tempfile.mkdtemp()
	self.plan_path = os.path.join(self.temp_dir, "test_model.plan")

	@pytest.mark.run_only_on('GPU')
	def tearDown(self):
	if os.path.exists(self.plan_path):
	os.remove(self.plan_path)
	os.rmdir(self.temp_dir)

	@pytest.mark.run_only_on('GPU')
	def test_get_profile_shapes(self):
	from nemo.export.tensorrt_lazy_compiler import get_profile_shapes

	input_shape = [1, 3, 224, 224]
	dynamic_batchsize = [1, 4, 8]

	min_shape, opt_shape, max_shape = get_profile_shapes(input_shape, dynamic_batchsize)

	self.assertEqual(min_shape, [1, 3, 224, 224])
	self.assertEqual(opt_shape, [4, 3, 224, 224])
	self.assertEqual(max_shape, [8, 3, 224, 224])

	# Test with None dynamic_batchsize
	min_shape, opt_shape, max_shape = get_profile_shapes(input_shape, None)
	self.assertEqual(min_shape, input_shape)
	self.assertEqual(opt_shape, input_shape)
	self.assertEqual(max_shape, input_shape)

	@pytest.mark.run_only_on('GPU')
	def test_get_dynamic_axes(self):
	from nemo.export.tensorrt_lazy_compiler import get_dynamic_axes

	profiles = [{"input": [[1, 3, 224, 224], [4, 3, 224, 224], [8, 3, 224, 224]]}]

	dynamic_axes = get_dynamic_axes(profiles)
	self.assertEqual(dynamic_axes, {"input": [0]})

	# Test with empty profiles
	dynamic_axes = get_dynamic_axes([])
	self.assertEqual(dynamic_axes, {})

	@pytest.mark.run_only_on('GPU')
	@patch('nemo.export.tensorrt_lazy_compiler.trt_imported', True)
	@patch('nemo.export.tensorrt_lazy_compiler.polygraphy_imported', True)
	@patch('torch.cuda.is_available', return_value=True)
	def test_trt_compile_basic(self, mock_cuda_available):
	from nemo.export.tensorrt_lazy_compiler import trt_compile

	# Test basic compilation
	compiled_model = trt_compile(
	self.model,
	self.plan_path,
	args={"method": "onnx", "precision": "fp16", "build_args": {"builder_optimization_level": 5}},
	)

	self.assertEqual(compiled_model, self.model)
	self.assertTrue(hasattr(compiled_model, '_trt_compiler'))

	@pytest.mark.run_only_on('GPU')
	@patch('nemo.export.tensorrt_lazy_compiler.trt_imported', False)
	def test_trt_compile_no_tensorrt(self):
	from nemo.export.tensorrt_lazy_compiler import trt_compile

	# Test when TensorRT is not available
	compiled_model = trt_compile(self.model, self.plan_path)
	self.assertEqual(compiled_model, self.model)
	self.assertFalse(hasattr(compiled_model, '_trt_compiler'))

	@pytest.mark.run_only_on('GPU')
	def test_trt_compiler_initialization(self):
	from nemo.export.tensorrt_lazy_compiler import TrtCompiler

	compiler = TrtCompiler(
	self.model,
	self.plan_path,
	precision="fp16",
	method="onnx",
	input_names=["x"],
	output_names=["output"],
	logger=MagicMock(),
	)

	self.assertEqual(compiler.plan_path, self.plan_path)
	self.assertEqual(compiler.precision, "fp16")
	self.assertEqual(compiler.method, "onnx")
	self.assertEqual(compiler.input_names, ["x"])
	self.assertEqual(compiler.output_names, ["output"])

	@pytest.mark.run_only_on('GPU')
	def test_trt_compiler_invalid_precision(self):
	from nemo.export.tensorrt_lazy_compiler import TrtCompiler

	with self.assertRaises(ValueError):
	TrtCompiler(self.model, self.plan_path, precision="invalid_precision")

	@pytest.mark.run_only_on('GPU')
	def test_trt_compiler_invalid_method(self):
	from nemo.export.tensorrt_lazy_compiler import TrtCompiler

	with self.assertRaises(ValueError):
	TrtCompiler(self.model, self.plan_path, method="invalid_method")

	@pytest.mark.run_only_on('GPU')
	@patch('nemo.export.tensorrt_lazy_compiler.trt_imported', True)
	@patch('nemo.export.tensorrt_lazy_compiler.polygraphy_imported', True)
	@patch('torch.cuda.is_available', return_value=True)
	def test_trt_compile_with_submodule(self, mock_cuda_available):
	from nemo.export.tensorrt_lazy_compiler import trt_compile

	class NestedModel(nn.Module):
	def __init__(self):
	super().__init__()
	self.submodule = SimpleModel()

	model = NestedModel()
	compiled_model = trt_compile(model, self.plan_path, submodule=["submodule"])

	self.assertEqual(compiled_model, model)
	self.assertTrue(hasattr(model.submodule, '_trt_compiler'))


	if __name__ == '__main__':
	unittest.main()