| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | import argparse |
| | import os |
| |
|
| | import torch |
| | import torch.nn as nn |
| | from transformers import ( |
| | AutoModelForCausalLM, |
| | AutoModelForSeq2SeqLM, |
| | AutoModelForSequenceClassification, |
| | AutoTokenizer, |
| | ) |
| |
|
| | from peft import LoftQConfig, LoraConfig, TaskType, get_peft_model |
| |
|
| |
|
| | class Shell(nn.Module): |
| | def __init__(self, weight, bias=None): |
| | super().__init__() |
| | self.weight = nn.Parameter(weight, requires_grad=False) |
| | if bias is not None: |
| | self.bias = nn.Parameter(bias, requires_grad=False) |
| |
|
| |
|
| | def unwrap_model(model, sub_module_name=".base_layer"): |
| | sub_module_name_list = [k.split(sub_module_name)[0] for k in model.state_dict().keys() if sub_module_name in k] |
| | sub_module_name_set = set(sub_module_name_list) |
| | for name in sub_module_name_set: |
| | |
| | name_parent = ".".join(name.split(".")[:-1]) |
| | name_child = name.split(".")[-1] |
| | sub_module = model.get_submodule(name_parent) |
| | print(sub_module) |
| |
|
| | |
| | child = getattr(sub_module, name_child) |
| | weight = getattr(child.base_layer, "weight", None) |
| | bias = getattr(child.base_layer, "bias", None) |
| | shell = Shell(weight, bias) |
| |
|
| | setattr(sub_module, name_child, shell) |
| |
|
| | print("You have unwrapped the model. Use it on your own risk.") |
| |
|
| |
|
| | def print_model(model, name): |
| | print("=" * 10 + name + "=" * 10) |
| | print(model) |
| | for name, param in model.named_parameters(): |
| | if torch.is_tensor(param): |
| | if param.dtype in [torch.float32, torch.float16]: |
| | print( |
| | name, |
| | param.shape, |
| | param.device, |
| | param.dtype, |
| | param.requires_grad, |
| | param.mean().item(), |
| | param.max().item(), |
| | ) |
| | else: |
| | print(name, param.shape, param.device, param.dtype, param.requires_grad) |
| |
|
| |
|
| | def arg_parse(): |
| | parser = argparse.ArgumentParser(description="Quantize a model with LoftQ.") |
| | parser.add_argument( |
| | "--model_name_or_path", |
| | type=str, |
| | default=None, |
| | required=True, |
| | help="The name or path of the fp32/16 model.", |
| | ) |
| | parser.add_argument( |
| | "--token", |
| | type=str, |
| | default=None, |
| | help="The access token to download model from HuggingFace Hub.", |
| | ) |
| | parser.add_argument( |
| | "--bits", |
| | type=int, |
| | default=4, |
| | help="The quantized bits", |
| | ) |
| | parser.add_argument( |
| | "--iter", |
| | type=int, |
| | default=1, |
| | help="The alternating steps in LoftQ", |
| | ) |
| | parser.add_argument( |
| | "--rank", |
| | type=int, |
| | default=16, |
| | help="The rank of the LoRA adapter", |
| | ) |
| | parser.add_argument( |
| | "--save_dir", |
| | type=str, |
| | default="./model_zoo/loftq/", |
| | help="The rank of the LoRA adapter", |
| | ) |
| | args = parser.parse_args() |
| | return args |
| |
|
| |
|
| | def quantize_and_save(): |
| | args = arg_parse() |
| |
|
| | |
| | tokenizer = AutoTokenizer.from_pretrained(args.model_name_or_path, token=args.token, trust_remote_code=True) |
| | if any(name in args.model_name_or_path.lower() for name in ["llama", "mistral", "falcon"]): |
| | model = AutoModelForCausalLM.from_pretrained(args.model_name_or_path, token=args.token, trust_remote_code=True) |
| | task_type = TaskType.CAUSAL_LM |
| | target_modules = ["q_proj", "k_proj", "v_proj", "o_proj", "up_proj", "down_proj", "gate_proj"] |
| |
|
| | elif any(name in args.model_name_or_path.lower() for name in ["bart", "t5"]): |
| | model = AutoModelForSeq2SeqLM.from_pretrained(args.model_name_or_path, token=args.token) |
| | task_type = TaskType.SEQ_2_SEQ_LM |
| | target_modules = ["q_proj", "k_proj", "v_proj", "fc1", "fc2", "out_proj"] |
| |
|
| | elif any(name in args.model_name_or_path.lower() for name in ["deberta", "roberta", "bert"]): |
| | model = AutoModelForSequenceClassification.from_pretrained(args.model_name_or_path, token=args.token) |
| | task_type = TaskType.SEQ_CLS |
| | target_modules = ["query_proj", "key_proj", "value_proj", "dense"] |
| | else: |
| | raise NotImplementedError("Other models not supported yet.") |
| |
|
| | |
| | loftq_config = LoftQConfig(loftq_bits=args.bits, loftq_iter=args.iter) |
| |
|
| | lora_config = LoraConfig( |
| | task_type=task_type, |
| | inference_mode=True, |
| | r=args.rank, |
| | lora_alpha=16 if task_type is TaskType.CAUSAL_LM else args.rank, |
| | lora_dropout=0.1, |
| | target_modules=target_modules, |
| | init_lora_weights="loftq", |
| | loftq_config=loftq_config, |
| | ) |
| |
|
| | |
| | lora_model = get_peft_model(model, lora_config) |
| | base_model = lora_model.get_base_model() |
| |
|
| | |
| | model_name = args.model_name_or_path.split("/")[-1] + f"-{args.bits}bit" + f"-{args.rank}rank" |
| | base_model_dir = os.path.join(args.save_dir, model_name) |
| | lora_model_dir = os.path.join(args.save_dir, model_name, "loft_init") |
| |
|
| | |
| | lora_model.base_model.peft_config[ |
| | "default" |
| | ].base_model_name_or_path = base_model_dir |
| | lora_model.base_model.peft_config["default"].init_lora_weights = True |
| |
|
| | lora_model.save_pretrained(lora_model_dir) |
| | print_model(lora_model, "lora_model") |
| |
|
| | |
| | unwrap_model(base_model) |
| | base_model.save_pretrained(base_model_dir) |
| | tokenizer.save_pretrained(base_model_dir) |
| |
|
| | print_model(base_model, "base_model") |
| |
|
| | return base_model_dir, lora_model_dir |
| |
|
| |
|
| | if __name__ == "__main__": |
| | base_dir, lora_dir = quantize_and_save() |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| |
|