NeMo_Canary / tests /lightning /pytorch /callbacks /test_nsys.py

Upload folder using huggingface_hub

b386992 verified 7 months ago

8.3 kB

	# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.

	from unittest.mock import MagicMock, patch

	import pytest
	import torch
	from nemo.lightning.pytorch.callbacks.nsys import NsysCallback


	class TestNsysCallback:
	@pytest.fixture(autouse=True)
	def setup_mocks(self):
	self.cuda_mock = patch('torch.cuda')
	self.cudart_mock = patch('torch.cuda.cudart')
	self.emit_nvtx_mock = patch('torch.autograd.profiler.emit_nvtx')
	self.get_rank_mock = patch('nemo.lightning.pytorch.callbacks.nsys.get_rank')

	self.cuda_mock.start()
	self.cudart_mock.start()
	self.emit_nvtx_mock.start()
	self.get_rank_mock.start()

	# Mock CUDA availability
	torch.cuda.is_available = MagicMock(return_value=True)
	torch.cuda.current_device = MagicMock(return_value=0)

	# Set up fixed cudart mock for all tests
	self.fixed_cudart = MagicMock()
	torch.cuda.cudart = MagicMock(return_value=self.fixed_cudart)

	yield

	self.cuda_mock.stop()
	self.cudart_mock.stop()
	self.emit_nvtx_mock.stop()
	self.get_rank_mock.stop()

	@pytest.fixture
	def mock_trainer(self):
	trainer = MagicMock()
	trainer.strategy.root_device.type = 'cuda'
	return trainer

	@pytest.fixture
	def mock_pl_module(self):
	return MagicMock()

	def test_init_valid_params(self):
	"""Test initialization with valid parameters."""
	callback = NsysCallback(start_step=10, end_step=20, ranks=[0, 1], gen_shape=True)
	assert callback._nsys_profile_start_step == 10
	assert callback._nsys_profile_end_step == 20
	assert callback._nsys_profile_ranks == [0, 1]
	assert callback._nsys_profile_gen_shape == True

	def test_init_invalid_params(self):
	"""Test initialization with invalid parameters."""
	with pytest.raises(AssertionError):
	NsysCallback(start_step='10', end_step=20)

	with pytest.raises(AssertionError):
	NsysCallback(start_step=10, end_step='20')

	with pytest.raises(AssertionError):
	NsysCallback(start_step=20, end_step=10)

	@patch('nemo.lightning.pytorch.callbacks.nsys.get_rank')
	@patch('torch.autograd.profiler.emit_nvtx')
	def test_on_train_batch_start_profiling(self, mock_emit_nvtx, mock_get_rank, mock_trainer, mock_pl_module):
	# Set mocked cudart via the fixture patch
	mock_get_rank.return_value = 0
	callback = NsysCallback(start_step=10, end_step=20, ranks=[0], gen_shape=True)

	mock_trainer.strategy.current_epoch_step = 10
	callback.on_train_batch_start(mock_trainer, mock_pl_module, None, 10)

	self.fixed_cudart.cudaProfilerStart.assert_called_once()
	mock_emit_nvtx.assert_called_once_with(record_shapes=True)

	@patch('nemo.lightning.pytorch.callbacks.nsys.get_rank')
	def test_on_train_batch_start_no_profiling(self, mock_get_rank, mock_trainer, mock_pl_module):
	mock_get_rank.return_value = 0
	callback = NsysCallback(start_step=10, end_step=20, ranks=[0])

	mock_trainer.strategy.current_epoch_step = 9
	callback.on_train_batch_start(mock_trainer, mock_pl_module, None, 9)

	self.fixed_cudart.cudaProfilerStart.assert_not_called()

	@patch('nemo.lightning.pytorch.callbacks.nsys.get_rank')
	@patch('torch.autograd.profiler.emit_nvtx')
	def test_on_train_batch_end_profiling(self, mock_emit_nvtx, mock_get_rank, mock_trainer, mock_pl_module):
	mock_get_rank.return_value = 0
	callback = NsysCallback(start_step=10, end_step=20, ranks=[0])

	mock_trainer.strategy.current_epoch_step = 20
	assert callback._has_nsys_enabled == False
	callback._has_nsys_enabled = True
	callback.on_train_batch_end(mock_trainer, mock_pl_module, None, None, 20)

	self.fixed_cudart.cudaProfilerStop.assert_called_once()

	@patch('nemo.lightning.pytorch.callbacks.nsys.get_rank')
	@patch('torch.autograd.profiler.emit_nvtx')
	def test_on_train_batch_end_no_profiling(self, mock_emit_nvtx, mock_get_rank, mock_trainer, mock_pl_module):
	mock_get_rank.return_value = 0
	callback = NsysCallback(start_step=10, end_step=20, ranks=[0])

	callback.on_train_batch_end(mock_trainer, mock_pl_module, None, None, 19)

	self.fixed_cudart.cudaProfilerStop.assert_not_called()

	def test_non_cuda_device(self, mock_trainer, mock_pl_module):
	"""Test behavior when the device is not CUDA."""
	mock_trainer.strategy.root_device.type = 'cpu'
	callback = NsysCallback(start_step=10, end_step=20, ranks=[0])

	callback.on_train_batch_start(mock_trainer, mock_pl_module, None, 10)
	callback.on_train_batch_end(mock_trainer, mock_pl_module, None, None, 20)

	# No exceptions should be raised, and no profiling calls should be made

	@patch('nemo.lightning.pytorch.callbacks.nsys.get_rank')
	def test_rank_not_in_profile_ranks(self, mock_get_rank, mock_trainer, mock_pl_module):
	"""Test behavior when the current rank is not in the profile ranks."""
	mock_get_rank.return_value = 1
	callback = NsysCallback(start_step=10, end_step=20, ranks=[0])

	callback.on_train_batch_start(mock_trainer, mock_pl_module, None, 10)
	callback.on_train_batch_end(mock_trainer, mock_pl_module, None, None, 20)

	# No profiling calls should be made

	@pytest.mark.parametrize(
	"start_step,end_step,batch_idx,expected_call",
	[
	(10, 20, 9, False),
	(10, 20, 10, True),
	(10, 20, 15, False),
	(10, 20, 20, False),
	(10, 20, 21, False),
	],
	)
	@patch('nemo.lightning.pytorch.callbacks.nsys.get_rank')
	@patch('torch.autograd.profiler.emit_nvtx')
	def test_profiling_range(
	self,
	mock_emit_nvtx,
	mock_get_rank,
	start_step,
	end_step,
	batch_idx,
	expected_call,
	mock_trainer,
	mock_pl_module,
	):
	mock_get_rank.return_value = 0
	callback = NsysCallback(start_step=start_step, end_step=end_step, ranks=[0])

	mock_trainer.strategy.current_epoch_step = batch_idx
	callback.on_train_batch_start(mock_trainer, mock_pl_module, None, batch_idx)

	if expected_call:
	self.fixed_cudart.cudaProfilerStart.assert_called_once()
	mock_emit_nvtx.assert_called_once()
	else:
	self.fixed_cudart.cudaProfilerStart.assert_not_called()
	mock_emit_nvtx.assert_not_called()

	@patch('nemo.lightning.pytorch.callbacks.nsys.get_rank')
	def test_single_profile_range(self, mock_get_rank, mock_trainer, mock_pl_module):
	mock_get_rank.return_value = 0
	callback = NsysCallback(start_step=10, end_step=40, ranks=[0])

	# Ensure the device type is 'cuda'
	mock_trainer.strategy.root_device.type = 'cuda'

	# Start of range
	mock_trainer.strategy.current_epoch_step = 10
	callback.on_train_batch_start(mock_trainer, mock_pl_module, None, 10)
	assert self.fixed_cudart.cudaProfilerStart.call_count == 1, "cudaProfilerStart was not called"

	# Middle of range
	mock_trainer.strategy.current_epoch_step = 25
	callback.on_train_batch_start(mock_trainer, mock_pl_module, None, 25)
	assert self.fixed_cudart.cudaProfilerStart.call_count == 1, "cudaProfilerStart was called again"

	# End of range
	mock_trainer.strategy.current_epoch_step = 40
	callback.on_train_batch_end(mock_trainer, mock_pl_module, None, None, 40)
	assert self.fixed_cudart.cudaProfilerStop.call_count == 1, "cudaProfilerStop was not called"