File size: 4,680 Bytes

b386992

# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


import pytest
import torch


@pytest.mark.run_only_on('GPU')
def test_replace_number_add_offset():
    from nemo.export.utils.lora_converter import replace_number_add_offset

    # Test with no offset
    key = "layers.0.self_attention.lora_kqv_adapter.linear_in.weight"
    assert replace_number_add_offset(key, 0) == key

    # Test with positive offset
    assert replace_number_add_offset(key, 1) == "layers.1.self_attention.lora_kqv_adapter.linear_in.weight"

    # Test with negative offset
    assert replace_number_add_offset(key, -1) == "layers.-1.self_attention.lora_kqv_adapter.linear_in.weight"

    # Test with key that doesn't contain layer number
    key = "embedding.word_embeddings.weight"
    assert replace_number_add_offset(key, 1) == key


@pytest.mark.run_only_on('GPU')
def test_rename_qkv_keys():
    from nemo.export.utils.lora_converter import rename_qkv_keys

    key = "layers.0.self_attention.lora_kqv_adapter.linear_in.weight"
    new_keys = rename_qkv_keys(key)

    assert len(new_keys) == 3
    assert new_keys[0] == "layers.0.self_attention.lora_unfused_kqv_adapter.q_adapter.linear_in.weight"
    assert new_keys[1] == "layers.0.self_attention.lora_unfused_kqv_adapter.k_adapter.linear_in.weight"
    assert new_keys[2] == "layers.0.self_attention.lora_unfused_kqv_adapter.v_adapter.linear_in.weight"


@pytest.mark.run_only_on('GPU')
def test_reformat_module_names_to_hf():
    from nemo.export.utils.lora_converter import reformat_module_names_to_hf

    # Create sample tensors with NeMo-style names
    tensors = {
        "q_adapter.linear_in.weight": torch.randn(10, 10),
        "k_adapter.linear_out.weight": torch.randn(10, 10),
        "v_adapter.linear_in.weight": torch.randn(10, 10),
        "lora_dense_attention_adapter.linear_out.weight": torch.randn(10, 10),
        "lora_4htoh_adapter.linear_in.weight": torch.randn(10, 10),
        "gate_adapter.linear_out.weight": torch.randn(10, 10),
        "up_adapter.linear_in.weight": torch.randn(10, 10),
    }

    new_tensors, module_names = reformat_module_names_to_hf(tensors)

    # Check that all tensors were converted
    assert len(new_tensors) == len(tensors)

    # Check that module names were correctly identified
    expected_modules = ["q_proj", "k_proj", "v_proj", "o_proj", "down_proj", "gate_proj", "up_proj"]
    assert set(module_names) == set(expected_modules)

    # Check some specific conversions
    assert "base_model.q_proj.lora_A.weight" in new_tensors
    assert "base_model.k_proj.lora_B.weight" in new_tensors
    assert "base_model.v_proj.lora_A.weight" in new_tensors


@pytest.mark.run_only_on('GPU')
def test_convert_lora_weights_to_canonical():
    from nemo.export.utils.lora_converter import convert_lora_weights_to_canonical

    # Create a sample config
    config = {
        "hidden_size": 512,
        "num_attention_heads": 8,
        "num_query_groups": 4,
        "peft": {"lora_tuning": {"adapter_dim": 16}},
    }

    # Create sample fused QKV weights
    lora_weights = {
        "layers.0.self_attention.lora_kqv_adapter.linear_in.weight": torch.randn(16, 1024),
        "layers.0.self_attention.lora_kqv_adapter.linear_out.weight": torch.randn(1024, 16),
        "layers.0.lora_hto4h_adapter.linear_in.weight": torch.randn(16, 1024),
        "layers.0.lora_hto4h_adapter.linear_out.weight": torch.randn(2048, 16),
    }

    converted_weights = convert_lora_weights_to_canonical(config, lora_weights)

    # Check that QKV weights were unfused
    assert "layers.0.self_attention.lora_unfused_kqv_adapter.q_adapter.linear_in.weight" in converted_weights
    assert "layers.0.self_attention.lora_unfused_kqv_adapter.k_adapter.linear_in.weight" in converted_weights
    assert "layers.0.self_attention.lora_unfused_kqv_adapter.v_adapter.linear_in.weight" in converted_weights

    # Check that H-to-4H weights were unfused
    assert "layers.0.lora_unfused_hto4h_adapter.gate_adapter.linear_in.weight" in converted_weights
    assert "layers.0.lora_unfused_hto4h_adapter.up_adapter.linear_in.weight" in converted_weights