|
|
import transformers |
|
|
import torch |
|
|
import logging |
|
|
|
|
|
|
|
|
def maybe_zero_3(param, ignore_status=False, name=None): |
|
|
from deepspeed import zero |
|
|
from deepspeed.runtime.zero.partition_parameters import ZeroParamStatus |
|
|
if hasattr(param, "ds_id"): |
|
|
if param.ds_status == ZeroParamStatus.NOT_AVAILABLE: |
|
|
if not ignore_status: |
|
|
logging.warning(f"{name}: param.ds_status != ZeroParamStatus.NOT_AVAILABLE: {param.ds_status}") |
|
|
with zero.GatheredParameters([param]): |
|
|
param = param.data.detach().cpu().clone() |
|
|
else: |
|
|
param = param.detach().cpu().clone() |
|
|
return param |
|
|
|
|
|
|
|
|
def get_peft_state_maybe_zero_3(named_params, bias): |
|
|
if bias == "none": |
|
|
to_return = {k: t for k, t in named_params if "lora_" in k} |
|
|
elif bias == "all": |
|
|
to_return = {k: t for k, t in named_params if "lora_" in k or "bias" in k} |
|
|
elif bias == "lora_only": |
|
|
to_return = {} |
|
|
maybe_lora_bias = {} |
|
|
lora_bias_names = set() |
|
|
for k, t in named_params: |
|
|
if "lora_" in k: |
|
|
to_return[k] = t |
|
|
bias_name = k.split("lora_")[0] + "bias" |
|
|
lora_bias_names.add(bias_name) |
|
|
elif "bias" in k: |
|
|
maybe_lora_bias[k] = t |
|
|
for k, t in maybe_lora_bias: |
|
|
if bias_name in lora_bias_names: |
|
|
to_return[bias_name] = t |
|
|
else: |
|
|
raise NotImplementedError |
|
|
to_return = {k: maybe_zero_3(v, ignore_status=True) for k, v in to_return.items()} |
|
|
return to_return |
|
|
|
|
|
|
|
|
def get_peft_state_non_lora_maybe_zero_3(named_params, require_grad_only=True): |
|
|
to_return = {k: t for k, t in named_params if "lora_" not in k} |
|
|
if require_grad_only: |
|
|
to_return = {k: t for k, t in to_return.items() if t.requires_grad} |
|
|
to_return = {k: maybe_zero_3(v, ignore_status=True).cpu() for k, v in to_return.items()} |
|
|
return to_return |
|
|
|
|
|
def safe_save_model_for_hf_trainer(trainer: transformers.Trainer, |
|
|
output_dir: str): |
|
|
"""Collects the state dict and dump to disk.""" |
|
|
|
|
|
if trainer.deepspeed: |
|
|
torch.cuda.synchronize() |
|
|
trainer.save_model(output_dir) |
|
|
return |
|
|
|
|
|
state_dict = trainer.model.state_dict() |
|
|
if trainer.args.should_save: |
|
|
cpu_state_dict = { |
|
|
key: value.cpu() |
|
|
for key, value in state_dict.items() |
|
|
} |
|
|
del state_dict |
|
|
trainer._save(output_dir, state_dict=cpu_state_dict) |
|
|
trainer.model.config.save_pretrained(output_dir) |