s1ghhh's picture
Upload folder using huggingface_hub
d73500e verified
import torch
from torch import nn as nn, cuda
import os
def print_gpu_memory(accelerator):
if accelerator.is_local_main_process: # πŸ”
for i in range(cuda.device_count()):
used_memory = cuda.memory_allocated(0) // 1024 ** 2
print(f"GPU {i} Used Memory: {used_memory}MB")
def print_gpu_memory_device():
device = cuda.current_device()
used_memory = cuda.memory_allocated(device) // 1024 ** 2
print(f"GPU {device} Used Memory: {used_memory}MB")
def find_modules(module, layers=[], name='') -> dict:
"""
Recursively find the layers of a certain type in a module.
Args:
module (nn.Module): PyTorch module.
layers (list): List of layer types to find.
name (str): Name of the module.
Returns:
dict: Dictionary of layers of the given type(s) within the module.
"""
if type(module) in layers:
return {name: module}
res = {}
for name1, child in module.named_children():
res.update(find_modules(
child, layers=layers, name=name + '.' + name1 if name != '' else name1
))
return res
def find_linears(module) -> dict:
# πŸ” find only the expert weights
res = find_modules(module, [nn.Linear])
return res
@torch.no_grad()
def check_sparsity(model):
use_cache = model.config.use_cache
model.config.use_cache = False
layers = model.model.layers
count = 0
total_params = 0
for i in range(len(layers)):
layer = layers[i]
subset = find_modules(layer)
sub_count = 0
sub_params = 0
for name in subset:
W = subset[name].weight.data
count += (W == 0).sum().item()
total_params += W.numel()
sub_count += (W == 0).sum().item()
sub_params += W.numel()
print(f"layer {i} sparsity {float(sub_count) / sub_params:.6f}")
model.config.use_cache = use_cache
return float(count) / total_params
@torch.no_grad()
def check_sparsity_from_state_dict(state_dict):
"""
πŸ” This function has been rewritten to calculate sparsity from "state_dict".
"""
# Get corresponding names for each layer
layer_params = {}
for name in sorted(list(state_dict.keys())):
if "layers" in name:
layer_id = int(name.split(".")[2])
if layer_id not in layer_params:
layer_params[layer_id] = [name]
else:
layer_params[layer_id].append(name)
layer_num = max(list(layer_params.keys())) + 1
# Calculate sparsity
count = 0
total_params = 0
for i in range(layer_num):
sub_count = 0
sub_params = 0
for name in layer_params[i]:
count += (state_dict[name] == 0).sum().item()
total_params += state_dict[name].numel()
sub_count += (state_dict[name] == 0).sum().item()
sub_params += state_dict[name].numel()
print(f"layer {i} sparsity {float(sub_count) / sub_params:.6f}")
return float(count) / total_params
@torch.no_grad()
def prepare_calibration_input(model, dataloader, num_samples=16):
layers = model.model.layers
cache = {'inputs': [], 'attention_mask': [], "position_ids": [], "position_ids": [], "cache_position": []}
class Catcher(nn.Module):
def __init__(self, module):
super().__init__()
self.module = module
self.self_attn = None
def forward(self, input, **kwargs):
# print(input.shape)
cache['inputs'].append(input)
cache['attention_mask'].append(kwargs['attention_mask'])
cache['position_ids'].append(kwargs['position_ids'])
cache['cache_position'].append(kwargs['cache_position'] if 'cache_position' in kwargs else None)
raise ValueError
layers[0] = Catcher(layers[0])
for index, batch in enumerate(dataloader):
if index >= num_samples: # πŸ” limit the number of samples in each device, batch_size must be 1
break
try:
model(**batch)
except ValueError:
pass
layers[0] = layers[0].module
outputs = [None] * len(cache['inputs'])
return cache['inputs'], outputs, cache['attention_mask'], cache['position_ids'], cache['cache_position']
auto_map = {
"llama": {
"AutoConfig": "configuration_dropped_llama.LlamaConfig",
"AutoModelForCausalLM": "modeling_dropped_llama.LlamaForCausalLM"
},
"mistral": {
"AutoConfig": "configuration_dropped_mistral.MistralConfig",
"AutoModelForCausalLM": "modeling_dropped_mistral.MistralForCausalLM"
},
"deepseek":
{
"AutoConfig": "configuration_deepseek.DeepseekConfig",
"AutoModelForCausalLM": "modeling_dropped_deepseek.DeepseekForCausalLM"
},
"gemma2":
{
"AutoConfig": "configuration_dropped_gemma2.Gemma2Config",
"AutoModelForCausalLM": "modeling_dropped_gemma2.Gemma2ForCausalLM"
},
"baichuan":
{
"AutoConfig": "configuration_dropped_baichuan.BaichuanConfig",
"AutoModelForCausalLM": "modeling_dropped_baichuan.BaichuanForCausalLM"
}
}
CUSTOM_FILE ={
"llama": {
"config": os.path.join(os.path.dirname(__file__), "models/configuration_dropped_llama.py"),
"model": os.path.join(os.path.dirname(__file__), "models/modeling_dropped_llama.py")
},
"mistral": {
"config": os.path.join(os.path.dirname(__file__), "models/configuration_dropped_mistral.py"),
"model": os.path.join(os.path.dirname(__file__), "models/modeling_dropped_mistral.py")
},
"deepseek": {
"config": os.path.join(os.path.dirname(__file__), "models/configuration_deepseek.py"),
"model": os.path.join(os.path.dirname(__file__), "models/modeling_dropped_deepseek.py")
},
"gemma2": {
"config": os.path.join(os.path.dirname(__file__), "models/configuration_dropped_gemma2.py"),
"model": os.path.join(os.path.dirname(__file__), "models/modeling_dropped_gemma2.py")
},
"baichuan": {
"config": os.path.join(os.path.dirname(__file__), "models/configuration_dropped_baichuan.py"),
"model": os.path.join(os.path.dirname(__file__), "models/modeling_dropped_baichuan.py")
}
}