# Copyright 2024-present the HuggingFace Inc. team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# This is not a full on test suite of vision models, since we already run many tests on dummy models with Conv2d layers
# and on stable diffusion models. Instead, this file contains specific tests for bugs that have been found in the past.
import gc

import pytest
import torch
from datasets import load_dataset
from safetensors.torch import load_file
from transformers import AutoImageProcessor, AutoModelForImageClassification

from peft import LoHaConfig, LoKrConfig, LoraConfig, OFTConfig, PeftModel, get_peft_model


CONFIGS = {
    "lora": LoraConfig(target_modules=["convolution"], modules_to_save=["classifier", "normalization"]),
    "loha": LoHaConfig(target_modules=["convolution"], modules_to_save=["classifier", "normalization"]),
    "lokr": LoKrConfig(target_modules=["convolution"], modules_to_save=["classifier", "normalization"]),
    "oft": OFTConfig(target_modules=["convolution"], modules_to_save=["classifier", "normalization"]),
    # TODO: cannot use BOFT because some convolutional kernel dimensions are even (64) and others odd (147). There is no
    # common denominator for the boft_block_size except 1, but using 1 results in an error in the fbd_cuda kernel:
    # > Error in forward_fast_block_diag_cuda_kernel: an illegal memory access was encountered
    # "boft": BOFTConfig(target_modules=["convolution"], modules_to_save=["classifier", "normalization"], boft_block_size=2),
}


class TestResnet:
    model_id = "microsoft/resnet-18"

    @pytest.fixture(autouse=True)
    def teardown(self):
        r"""
        Efficient mechanism to free GPU memory after each test. Based on
        https://github.com/huggingface/transformers/issues/21094
        """
        gc.collect()
        if torch.cuda.is_available():
            torch.cuda.empty_cache()
        gc.collect()

    @pytest.fixture(scope="class")
    def image_processor(self):
        image_processor = AutoImageProcessor.from_pretrained(self.model_id)
        return image_processor

    @pytest.fixture(scope="class")
    def data(self, image_processor):
        dataset = load_dataset("huggingface/cats-image", trust_remote_code=True)
        image = dataset["test"]["image"][0]
        return image_processor(image, return_tensors="pt")

    @pytest.mark.parametrize("config", CONFIGS.values(), ids=CONFIGS.keys())
    def test_model_with_batchnorm_reproducibility(self, config, tmp_path, data):
        # see 1732
        torch.manual_seed(0)
        model = AutoModelForImageClassification.from_pretrained(self.model_id)
        model = get_peft_model(model, config)

        # record outputs before training
        model.eval()
        with torch.inference_mode():
            output_before = model(**data)
        model.train()

        # train the model
        optimizer = torch.optim.AdamW(model.parameters(), lr=1e-3)
        batch_size = 4
        max_steps = 5 * batch_size
        labels = torch.zeros(1, 1000)
        labels[0, 283] = 1
        for i in range(0, max_steps, batch_size):
            optimizer.zero_grad()
            outputs = model(**data, labels=labels)
            loss = outputs.loss
            loss.backward()
            optimizer.step()

        # record outputs after training
        model.eval()
        with torch.inference_mode():
            output_after = model(**data)
        assert torch.isfinite(output_after.logits).all()
        atol, rtol = 1e-4, 1e-4
        # sanity check: model was updated
        assert not torch.allclose(output_before.logits, output_after.logits, atol=atol, rtol=rtol)

        # check saving the model and loading it
        model.save_pretrained(tmp_path)
        del model

        torch.manual_seed(0)
        model = AutoModelForImageClassification.from_pretrained(self.model_id)
        model = PeftModel.from_pretrained(model, tmp_path).eval()
        with torch.inference_mode():
            output_loaded = model(**data)
        assert torch.allclose(output_after.logits, output_loaded.logits, atol=atol, rtol=rtol)

        # ensure that the checkpoint file contains the buffers
        model_running_mean = len([k for k in model.state_dict().keys() if "running_mean" in k])
        state_dict = load_file(tmp_path / "adapter_model.safetensors")
        checkpoint_running_mean = len([k for k in state_dict.keys() if "running_mean" in k])
        # note that the model has twice as many "running_mean", as there is one copy per ModulesToSaveWrapper, we need
        # to multiply by 2 to get the same number
        assert model_running_mean == checkpoint_running_mean * 2