|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import pytest |
|
|
import torch |
|
|
|
|
|
|
|
|
@pytest.mark.run_only_on('GPU') |
|
|
def test_replace_number_add_offset(): |
|
|
from nemo.export.utils.lora_converter import replace_number_add_offset |
|
|
|
|
|
|
|
|
key = "layers.0.self_attention.lora_kqv_adapter.linear_in.weight" |
|
|
assert replace_number_add_offset(key, 0) == key |
|
|
|
|
|
|
|
|
assert replace_number_add_offset(key, 1) == "layers.1.self_attention.lora_kqv_adapter.linear_in.weight" |
|
|
|
|
|
|
|
|
assert replace_number_add_offset(key, -1) == "layers.-1.self_attention.lora_kqv_adapter.linear_in.weight" |
|
|
|
|
|
|
|
|
key = "embedding.word_embeddings.weight" |
|
|
assert replace_number_add_offset(key, 1) == key |
|
|
|
|
|
|
|
|
@pytest.mark.run_only_on('GPU') |
|
|
def test_rename_qkv_keys(): |
|
|
from nemo.export.utils.lora_converter import rename_qkv_keys |
|
|
|
|
|
key = "layers.0.self_attention.lora_kqv_adapter.linear_in.weight" |
|
|
new_keys = rename_qkv_keys(key) |
|
|
|
|
|
assert len(new_keys) == 3 |
|
|
assert new_keys[0] == "layers.0.self_attention.lora_unfused_kqv_adapter.q_adapter.linear_in.weight" |
|
|
assert new_keys[1] == "layers.0.self_attention.lora_unfused_kqv_adapter.k_adapter.linear_in.weight" |
|
|
assert new_keys[2] == "layers.0.self_attention.lora_unfused_kqv_adapter.v_adapter.linear_in.weight" |
|
|
|
|
|
|
|
|
@pytest.mark.run_only_on('GPU') |
|
|
def test_reformat_module_names_to_hf(): |
|
|
from nemo.export.utils.lora_converter import reformat_module_names_to_hf |
|
|
|
|
|
|
|
|
tensors = { |
|
|
"q_adapter.linear_in.weight": torch.randn(10, 10), |
|
|
"k_adapter.linear_out.weight": torch.randn(10, 10), |
|
|
"v_adapter.linear_in.weight": torch.randn(10, 10), |
|
|
"lora_dense_attention_adapter.linear_out.weight": torch.randn(10, 10), |
|
|
"lora_4htoh_adapter.linear_in.weight": torch.randn(10, 10), |
|
|
"gate_adapter.linear_out.weight": torch.randn(10, 10), |
|
|
"up_adapter.linear_in.weight": torch.randn(10, 10), |
|
|
} |
|
|
|
|
|
new_tensors, module_names = reformat_module_names_to_hf(tensors) |
|
|
|
|
|
|
|
|
assert len(new_tensors) == len(tensors) |
|
|
|
|
|
|
|
|
expected_modules = ["q_proj", "k_proj", "v_proj", "o_proj", "down_proj", "gate_proj", "up_proj"] |
|
|
assert set(module_names) == set(expected_modules) |
|
|
|
|
|
|
|
|
assert "base_model.q_proj.lora_A.weight" in new_tensors |
|
|
assert "base_model.k_proj.lora_B.weight" in new_tensors |
|
|
assert "base_model.v_proj.lora_A.weight" in new_tensors |
|
|
|
|
|
|
|
|
@pytest.mark.run_only_on('GPU') |
|
|
def test_convert_lora_weights_to_canonical(): |
|
|
from nemo.export.utils.lora_converter import convert_lora_weights_to_canonical |
|
|
|
|
|
|
|
|
config = { |
|
|
"hidden_size": 512, |
|
|
"num_attention_heads": 8, |
|
|
"num_query_groups": 4, |
|
|
"peft": {"lora_tuning": {"adapter_dim": 16}}, |
|
|
} |
|
|
|
|
|
|
|
|
lora_weights = { |
|
|
"layers.0.self_attention.lora_kqv_adapter.linear_in.weight": torch.randn(16, 1024), |
|
|
"layers.0.self_attention.lora_kqv_adapter.linear_out.weight": torch.randn(1024, 16), |
|
|
"layers.0.lora_hto4h_adapter.linear_in.weight": torch.randn(16, 1024), |
|
|
"layers.0.lora_hto4h_adapter.linear_out.weight": torch.randn(2048, 16), |
|
|
} |
|
|
|
|
|
converted_weights = convert_lora_weights_to_canonical(config, lora_weights) |
|
|
|
|
|
|
|
|
assert "layers.0.self_attention.lora_unfused_kqv_adapter.q_adapter.linear_in.weight" in converted_weights |
|
|
assert "layers.0.self_attention.lora_unfused_kqv_adapter.k_adapter.linear_in.weight" in converted_weights |
|
|
assert "layers.0.self_attention.lora_unfused_kqv_adapter.v_adapter.linear_in.weight" in converted_weights |
|
|
|
|
|
|
|
|
assert "layers.0.lora_unfused_hto4h_adapter.gate_adapter.linear_in.weight" in converted_weights |
|
|
assert "layers.0.lora_unfused_hto4h_adapter.up_adapter.linear_in.weight" in converted_weights |
|
|
|