nice-model

9d6cb8e verified almost 2 years ago

6.61 kB

	# Copyright 2023-present the HuggingFace Inc. team.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.

	import argparse
	import os

	import torch
	import torch.nn as nn
	from transformers import (
	AutoModelForCausalLM,
	AutoModelForSeq2SeqLM,
	AutoModelForSequenceClassification,
	AutoTokenizer,
	)

	from peft import LoftQConfig, LoraConfig, TaskType, get_peft_model


	class Shell(nn.Module):
	def __init__(self, weight, bias=None):
	super().__init__()
	self.weight = nn.Parameter(weight, requires_grad=False)
	if bias is not None:
	self.bias = nn.Parameter(bias, requires_grad=False)


	def unwrap_model(model, sub_module_name=".base_layer"):
	sub_module_name_list = [k.split(sub_module_name)[0] for k in model.state_dict().keys() if sub_module_name in k]
	sub_module_name_set = set(sub_module_name_list)
	for name in sub_module_name_set:
	# get the parent of the submodule
	name_parent = ".".join(name.split(".")[:-1])
	name_child = name.split(".")[-1]
	sub_module = model.get_submodule(name_parent)
	print(sub_module)

	# replace with shell
	child = getattr(sub_module, name_child)
	weight = getattr(child.base_layer, "weight", None)
	bias = getattr(child.base_layer, "bias", None)
	shell = Shell(weight, bias)

	setattr(sub_module, name_child, shell)

	print("You have unwrapped the model. Use it on your own risk.")


	def print_model(model, name):
	print("=" * 10 + name + "=" * 10)
	print(model)
	for name, param in model.named_parameters():
	if torch.is_tensor(param):
	if param.dtype in [torch.float32, torch.float16]:
	print(
	name,
	param.shape,
	param.device,
	param.dtype,
	param.requires_grad,
	param.mean().item(),
	param.max().item(),
	)
	else:
	print(name, param.shape, param.device, param.dtype, param.requires_grad)


	def arg_parse():
	parser = argparse.ArgumentParser(description="Quantize a model with LoftQ.")
	parser.add_argument(
	"--model_name_or_path",
	type=str,
	default=None,
	required=True,
	help="The name or path of the fp32/16 model.",
	)
	parser.add_argument(
	"--token",
	type=str,
	default=None,
	help="The access token to download model from HuggingFace Hub.",
	)
	parser.add_argument(
	"--bits",
	type=int,
	default=4,
	help="The quantized bits",
	)
	parser.add_argument(
	"--iter",
	type=int,
	default=1,
	help="The alternating steps in LoftQ",
	)
	parser.add_argument(
	"--rank",
	type=int,
	default=16,
	help="The rank of the LoRA adapter",
	)
	parser.add_argument(
	"--save_dir",
	type=str,
	default="./model_zoo/loftq/",
	help="The rank of the LoRA adapter",
	)
	args = parser.parse_args()
	return args


	def quantize_and_save():
	args = arg_parse()

	# Download weights and configure LoRA
	tokenizer = AutoTokenizer.from_pretrained(args.model_name_or_path, token=args.token, trust_remote_code=True)
	if any(name in args.model_name_or_path.lower() for name in ["llama", "mistral", "falcon"]):
	model = AutoModelForCausalLM.from_pretrained(args.model_name_or_path, token=args.token, trust_remote_code=True)
	task_type = TaskType.CAUSAL_LM
	target_modules = ["q_proj", "k_proj", "v_proj", "o_proj", "up_proj", "down_proj", "gate_proj"]

	elif any(name in args.model_name_or_path.lower() for name in ["bart", "t5"]):
	model = AutoModelForSeq2SeqLM.from_pretrained(args.model_name_or_path, token=args.token)
	task_type = TaskType.SEQ_2_SEQ_LM
	target_modules = ["q_proj", "k_proj", "v_proj", "fc1", "fc2", "out_proj"]

	elif any(name in args.model_name_or_path.lower() for name in ["deberta", "roberta", "bert"]):
	model = AutoModelForSequenceClassification.from_pretrained(args.model_name_or_path, token=args.token)
	task_type = TaskType.SEQ_CLS
	target_modules = ["query_proj", "key_proj", "value_proj", "dense"] # embeddings not supported by peft
	else:
	raise NotImplementedError("Other models not supported yet.")

	# Config of LoftQ
	loftq_config = LoftQConfig(loftq_bits=args.bits, loftq_iter=args.iter)

	lora_config = LoraConfig(
	task_type=task_type,
	inference_mode=True,
	r=args.rank,
	lora_alpha=16 if task_type is TaskType.CAUSAL_LM else args.rank,
	lora_dropout=0.1,
	target_modules=target_modules,
	init_lora_weights="loftq",
	loftq_config=loftq_config,
	)

	# Obtain LoftQ model
	lora_model = get_peft_model(model, lora_config)
	base_model = lora_model.get_base_model()

	# Save LoftQ model
	model_name = args.model_name_or_path.split("/")[-1] + f"-{args.bits}bit" + f"-{args.rank}rank"
	base_model_dir = os.path.join(args.save_dir, model_name)
	lora_model_dir = os.path.join(args.save_dir, model_name, "loft_init")

	# save lora adapters first
	lora_model.base_model.peft_config[
	"default"
	].base_model_name_or_path = base_model_dir # This can be a local path or Hub model id
	lora_model.base_model.peft_config["default"].init_lora_weights = True # Don't apply LoftQ when loading again

	lora_model.save_pretrained(lora_model_dir)
	print_model(lora_model, "lora_model")

	# remove lora adapters and save the backbone
	unwrap_model(base_model)
	base_model.save_pretrained(base_model_dir)
	tokenizer.save_pretrained(base_model_dir)

	print_model(base_model, "base_model")

	return base_model_dir, lora_model_dir


	if __name__ == "__main__":
	base_dir, lora_dir = quantize_and_save()

	# example command:
	# python quantize_save_load.py \
	# --model_name_or_path meta-llama/Llama-2-7b-hf \
	# --token XXX \
	# --bits 4 --iter 5 --rank 16 \
	# --save_dir ./model_zoo/loftq/