NeMo_Canary / tests /export /utils /test_lora_converter.py
Respair's picture
Upload folder using huggingface_hub
b386992 verified
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import pytest
import torch
@pytest.mark.run_only_on('GPU')
def test_replace_number_add_offset():
from nemo.export.utils.lora_converter import replace_number_add_offset
# Test with no offset
key = "layers.0.self_attention.lora_kqv_adapter.linear_in.weight"
assert replace_number_add_offset(key, 0) == key
# Test with positive offset
assert replace_number_add_offset(key, 1) == "layers.1.self_attention.lora_kqv_adapter.linear_in.weight"
# Test with negative offset
assert replace_number_add_offset(key, -1) == "layers.-1.self_attention.lora_kqv_adapter.linear_in.weight"
# Test with key that doesn't contain layer number
key = "embedding.word_embeddings.weight"
assert replace_number_add_offset(key, 1) == key
@pytest.mark.run_only_on('GPU')
def test_rename_qkv_keys():
from nemo.export.utils.lora_converter import rename_qkv_keys
key = "layers.0.self_attention.lora_kqv_adapter.linear_in.weight"
new_keys = rename_qkv_keys(key)
assert len(new_keys) == 3
assert new_keys[0] == "layers.0.self_attention.lora_unfused_kqv_adapter.q_adapter.linear_in.weight"
assert new_keys[1] == "layers.0.self_attention.lora_unfused_kqv_adapter.k_adapter.linear_in.weight"
assert new_keys[2] == "layers.0.self_attention.lora_unfused_kqv_adapter.v_adapter.linear_in.weight"
@pytest.mark.run_only_on('GPU')
def test_reformat_module_names_to_hf():
from nemo.export.utils.lora_converter import reformat_module_names_to_hf
# Create sample tensors with NeMo-style names
tensors = {
"q_adapter.linear_in.weight": torch.randn(10, 10),
"k_adapter.linear_out.weight": torch.randn(10, 10),
"v_adapter.linear_in.weight": torch.randn(10, 10),
"lora_dense_attention_adapter.linear_out.weight": torch.randn(10, 10),
"lora_4htoh_adapter.linear_in.weight": torch.randn(10, 10),
"gate_adapter.linear_out.weight": torch.randn(10, 10),
"up_adapter.linear_in.weight": torch.randn(10, 10),
}
new_tensors, module_names = reformat_module_names_to_hf(tensors)
# Check that all tensors were converted
assert len(new_tensors) == len(tensors)
# Check that module names were correctly identified
expected_modules = ["q_proj", "k_proj", "v_proj", "o_proj", "down_proj", "gate_proj", "up_proj"]
assert set(module_names) == set(expected_modules)
# Check some specific conversions
assert "base_model.q_proj.lora_A.weight" in new_tensors
assert "base_model.k_proj.lora_B.weight" in new_tensors
assert "base_model.v_proj.lora_A.weight" in new_tensors
@pytest.mark.run_only_on('GPU')
def test_convert_lora_weights_to_canonical():
from nemo.export.utils.lora_converter import convert_lora_weights_to_canonical
# Create a sample config
config = {
"hidden_size": 512,
"num_attention_heads": 8,
"num_query_groups": 4,
"peft": {"lora_tuning": {"adapter_dim": 16}},
}
# Create sample fused QKV weights
lora_weights = {
"layers.0.self_attention.lora_kqv_adapter.linear_in.weight": torch.randn(16, 1024),
"layers.0.self_attention.lora_kqv_adapter.linear_out.weight": torch.randn(1024, 16),
"layers.0.lora_hto4h_adapter.linear_in.weight": torch.randn(16, 1024),
"layers.0.lora_hto4h_adapter.linear_out.weight": torch.randn(2048, 16),
}
converted_weights = convert_lora_weights_to_canonical(config, lora_weights)
# Check that QKV weights were unfused
assert "layers.0.self_attention.lora_unfused_kqv_adapter.q_adapter.linear_in.weight" in converted_weights
assert "layers.0.self_attention.lora_unfused_kqv_adapter.k_adapter.linear_in.weight" in converted_weights
assert "layers.0.self_attention.lora_unfused_kqv_adapter.v_adapter.linear_in.weight" in converted_weights
# Check that H-to-4H weights were unfused
assert "layers.0.lora_unfused_hto4h_adapter.gate_adapter.linear_in.weight" in converted_weights
assert "layers.0.lora_unfused_hto4h_adapter.up_adapter.linear_in.weight" in converted_weights