Upload folder using huggingface_hub

d73500e verified 3 months ago

6.48 kB

	import torch
	from torch import nn as nn, cuda
	import os

	def print_gpu_memory(accelerator):
	if accelerator.is_local_main_process: # 🔍
	for i in range(cuda.device_count()):
	used_memory = cuda.memory_allocated(0) // 1024 ** 2
	print(f"GPU {i} Used Memory: {used_memory}MB")


	def print_gpu_memory_device():
	device = cuda.current_device()
	used_memory = cuda.memory_allocated(device) // 1024 ** 2
	print(f"GPU {device} Used Memory: {used_memory}MB")


	def find_modules(module, layers=[], name='') -> dict:
	"""
	Recursively find the layers of a certain type in a module.

	Args:
	module (nn.Module): PyTorch module.
	layers (list): List of layer types to find.
	name (str): Name of the module.

	Returns:
	dict: Dictionary of layers of the given type(s) within the module.
	"""
	if type(module) in layers:
	return {name: module}
	res = {}
	for name1, child in module.named_children():
	res.update(find_modules(
	child, layers=layers, name=name + '.' + name1 if name != '' else name1
	))
	return res


	def find_linears(module) -> dict:
	# 🔍 find only the expert weights
	res = find_modules(module, [nn.Linear])
	return res


	@torch.no_grad()
	def check_sparsity(model):
	use_cache = model.config.use_cache
	model.config.use_cache = False

	layers = model.model.layers
	count = 0
	total_params = 0
	for i in range(len(layers)):
	layer = layers[i]
	subset = find_modules(layer)

	sub_count = 0
	sub_params = 0
	for name in subset:
	W = subset[name].weight.data
	count += (W == 0).sum().item()
	total_params += W.numel()

	sub_count += (W == 0).sum().item()
	sub_params += W.numel()

	print(f"layer {i} sparsity {float(sub_count) / sub_params:.6f}")

	model.config.use_cache = use_cache
	return float(count) / total_params


	@torch.no_grad()
	def check_sparsity_from_state_dict(state_dict):
	"""
	🔍 This function has been rewritten to calculate sparsity from "state_dict".
	"""
	# Get corresponding names for each layer
	layer_params = {}
	for name in sorted(list(state_dict.keys())):
	if "layers" in name:
	layer_id = int(name.split(".")[2])
	if layer_id not in layer_params:
	layer_params[layer_id] = [name]
	else:
	layer_params[layer_id].append(name)
	layer_num = max(list(layer_params.keys())) + 1

	# Calculate sparsity
	count = 0
	total_params = 0
	for i in range(layer_num):
	sub_count = 0
	sub_params = 0
	for name in layer_params[i]:
	count += (state_dict[name] == 0).sum().item()
	total_params += state_dict[name].numel()

	sub_count += (state_dict[name] == 0).sum().item()
	sub_params += state_dict[name].numel()

	print(f"layer {i} sparsity {float(sub_count) / sub_params:.6f}")

	return float(count) / total_params


	@torch.no_grad()
	def prepare_calibration_input(model, dataloader, num_samples=16):
	layers = model.model.layers

	cache = {'inputs': [], 'attention_mask': [], "position_ids": [], "position_ids": [], "cache_position": []}

	class Catcher(nn.Module):
	def __init__(self, module):
	super().__init__()
	self.module = module
	self.self_attn = None

	def forward(self, input, **kwargs):
	# print(input.shape)
	cache['inputs'].append(input)
	cache['attention_mask'].append(kwargs['attention_mask'])
	cache['position_ids'].append(kwargs['position_ids'])
	cache['cache_position'].append(kwargs['cache_position'] if 'cache_position' in kwargs else None)
	raise ValueError

	layers[0] = Catcher(layers[0])
	for index, batch in enumerate(dataloader):
	if index >= num_samples: # 🔍 limit the number of samples in each device, batch_size must be 1
	break
	try:
	model(**batch)
	except ValueError:
	pass
	layers[0] = layers[0].module
	outputs = [None] * len(cache['inputs'])
	return cache['inputs'], outputs, cache['attention_mask'], cache['position_ids'], cache['cache_position']



	auto_map = {
	"llama": {
	"AutoConfig": "configuration_dropped_llama.LlamaConfig",
	"AutoModelForCausalLM": "modeling_dropped_llama.LlamaForCausalLM"
	},
	"mistral": {
	"AutoConfig": "configuration_dropped_mistral.MistralConfig",
	"AutoModelForCausalLM": "modeling_dropped_mistral.MistralForCausalLM"
	},
	"deepseek":
	{
	"AutoConfig": "configuration_deepseek.DeepseekConfig",
	"AutoModelForCausalLM": "modeling_dropped_deepseek.DeepseekForCausalLM"
	},
	"gemma2":
	{
	"AutoConfig": "configuration_dropped_gemma2.Gemma2Config",
	"AutoModelForCausalLM": "modeling_dropped_gemma2.Gemma2ForCausalLM"
	},
	"baichuan":
	{
	"AutoConfig": "configuration_dropped_baichuan.BaichuanConfig",
	"AutoModelForCausalLM": "modeling_dropped_baichuan.BaichuanForCausalLM"
	}
	}

	CUSTOM_FILE ={
	"llama": {
	"config": os.path.join(os.path.dirname(__file__), "models/configuration_dropped_llama.py"),
	"model": os.path.join(os.path.dirname(__file__), "models/modeling_dropped_llama.py")
	},
	"mistral": {
	"config": os.path.join(os.path.dirname(__file__), "models/configuration_dropped_mistral.py"),
	"model": os.path.join(os.path.dirname(__file__), "models/modeling_dropped_mistral.py")
	},
	"deepseek": {
	"config": os.path.join(os.path.dirname(__file__), "models/configuration_deepseek.py"),
	"model": os.path.join(os.path.dirname(__file__), "models/modeling_dropped_deepseek.py")
	},
	"gemma2": {
	"config": os.path.join(os.path.dirname(__file__), "models/configuration_dropped_gemma2.py"),
	"model": os.path.join(os.path.dirname(__file__), "models/modeling_dropped_gemma2.py")
	},
	"baichuan": {
	"config": os.path.join(os.path.dirname(__file__), "models/configuration_dropped_baichuan.py"),
	"model": os.path.join(os.path.dirname(__file__), "models/modeling_dropped_baichuan.py")
	}
	}