|
|
import torch |
|
|
from torch import nn as nn, cuda |
|
|
import os |
|
|
|
|
|
def print_gpu_memory(accelerator): |
|
|
if accelerator.is_local_main_process: |
|
|
for i in range(cuda.device_count()): |
|
|
used_memory = cuda.memory_allocated(0) // 1024 ** 2 |
|
|
print(f"GPU {i} Used Memory: {used_memory}MB") |
|
|
|
|
|
|
|
|
def print_gpu_memory_device(): |
|
|
device = cuda.current_device() |
|
|
used_memory = cuda.memory_allocated(device) // 1024 ** 2 |
|
|
print(f"GPU {device} Used Memory: {used_memory}MB") |
|
|
|
|
|
|
|
|
def find_modules(module, layers=[], name='') -> dict: |
|
|
""" |
|
|
Recursively find the layers of a certain type in a module. |
|
|
|
|
|
Args: |
|
|
module (nn.Module): PyTorch module. |
|
|
layers (list): List of layer types to find. |
|
|
name (str): Name of the module. |
|
|
|
|
|
Returns: |
|
|
dict: Dictionary of layers of the given type(s) within the module. |
|
|
""" |
|
|
if type(module) in layers: |
|
|
return {name: module} |
|
|
res = {} |
|
|
for name1, child in module.named_children(): |
|
|
res.update(find_modules( |
|
|
child, layers=layers, name=name + '.' + name1 if name != '' else name1 |
|
|
)) |
|
|
return res |
|
|
|
|
|
|
|
|
def find_linears(module) -> dict: |
|
|
|
|
|
res = find_modules(module, [nn.Linear]) |
|
|
return res |
|
|
|
|
|
|
|
|
@torch.no_grad() |
|
|
def check_sparsity(model): |
|
|
use_cache = model.config.use_cache |
|
|
model.config.use_cache = False |
|
|
|
|
|
layers = model.model.layers |
|
|
count = 0 |
|
|
total_params = 0 |
|
|
for i in range(len(layers)): |
|
|
layer = layers[i] |
|
|
subset = find_modules(layer) |
|
|
|
|
|
sub_count = 0 |
|
|
sub_params = 0 |
|
|
for name in subset: |
|
|
W = subset[name].weight.data |
|
|
count += (W == 0).sum().item() |
|
|
total_params += W.numel() |
|
|
|
|
|
sub_count += (W == 0).sum().item() |
|
|
sub_params += W.numel() |
|
|
|
|
|
print(f"layer {i} sparsity {float(sub_count) / sub_params:.6f}") |
|
|
|
|
|
model.config.use_cache = use_cache |
|
|
return float(count) / total_params |
|
|
|
|
|
|
|
|
@torch.no_grad() |
|
|
def check_sparsity_from_state_dict(state_dict): |
|
|
""" |
|
|
π This function has been rewritten to calculate sparsity from "state_dict". |
|
|
""" |
|
|
|
|
|
layer_params = {} |
|
|
for name in sorted(list(state_dict.keys())): |
|
|
if "layers" in name: |
|
|
layer_id = int(name.split(".")[2]) |
|
|
if layer_id not in layer_params: |
|
|
layer_params[layer_id] = [name] |
|
|
else: |
|
|
layer_params[layer_id].append(name) |
|
|
layer_num = max(list(layer_params.keys())) + 1 |
|
|
|
|
|
|
|
|
count = 0 |
|
|
total_params = 0 |
|
|
for i in range(layer_num): |
|
|
sub_count = 0 |
|
|
sub_params = 0 |
|
|
for name in layer_params[i]: |
|
|
count += (state_dict[name] == 0).sum().item() |
|
|
total_params += state_dict[name].numel() |
|
|
|
|
|
sub_count += (state_dict[name] == 0).sum().item() |
|
|
sub_params += state_dict[name].numel() |
|
|
|
|
|
print(f"layer {i} sparsity {float(sub_count) / sub_params:.6f}") |
|
|
|
|
|
return float(count) / total_params |
|
|
|
|
|
|
|
|
@torch.no_grad() |
|
|
def prepare_calibration_input(model, dataloader, num_samples=16): |
|
|
layers = model.model.layers |
|
|
|
|
|
cache = {'inputs': [], 'attention_mask': [], "position_ids": [], "position_ids": [], "cache_position": []} |
|
|
|
|
|
class Catcher(nn.Module): |
|
|
def __init__(self, module): |
|
|
super().__init__() |
|
|
self.module = module |
|
|
self.self_attn = None |
|
|
|
|
|
def forward(self, input, **kwargs): |
|
|
|
|
|
cache['inputs'].append(input) |
|
|
cache['attention_mask'].append(kwargs['attention_mask']) |
|
|
cache['position_ids'].append(kwargs['position_ids']) |
|
|
cache['cache_position'].append(kwargs['cache_position'] if 'cache_position' in kwargs else None) |
|
|
raise ValueError |
|
|
|
|
|
layers[0] = Catcher(layers[0]) |
|
|
for index, batch in enumerate(dataloader): |
|
|
if index >= num_samples: |
|
|
break |
|
|
try: |
|
|
model(**batch) |
|
|
except ValueError: |
|
|
pass |
|
|
layers[0] = layers[0].module |
|
|
outputs = [None] * len(cache['inputs']) |
|
|
return cache['inputs'], outputs, cache['attention_mask'], cache['position_ids'], cache['cache_position'] |
|
|
|
|
|
|
|
|
|
|
|
auto_map = { |
|
|
"llama": { |
|
|
"AutoConfig": "configuration_dropped_llama.LlamaConfig", |
|
|
"AutoModelForCausalLM": "modeling_dropped_llama.LlamaForCausalLM" |
|
|
}, |
|
|
"mistral": { |
|
|
"AutoConfig": "configuration_dropped_mistral.MistralConfig", |
|
|
"AutoModelForCausalLM": "modeling_dropped_mistral.MistralForCausalLM" |
|
|
}, |
|
|
"deepseek": |
|
|
{ |
|
|
"AutoConfig": "configuration_deepseek.DeepseekConfig", |
|
|
"AutoModelForCausalLM": "modeling_dropped_deepseek.DeepseekForCausalLM" |
|
|
}, |
|
|
"gemma2": |
|
|
{ |
|
|
"AutoConfig": "configuration_dropped_gemma2.Gemma2Config", |
|
|
"AutoModelForCausalLM": "modeling_dropped_gemma2.Gemma2ForCausalLM" |
|
|
}, |
|
|
"baichuan": |
|
|
{ |
|
|
"AutoConfig": "configuration_dropped_baichuan.BaichuanConfig", |
|
|
"AutoModelForCausalLM": "modeling_dropped_baichuan.BaichuanForCausalLM" |
|
|
} |
|
|
} |
|
|
|
|
|
CUSTOM_FILE ={ |
|
|
"llama": { |
|
|
"config": os.path.join(os.path.dirname(__file__), "models/configuration_dropped_llama.py"), |
|
|
"model": os.path.join(os.path.dirname(__file__), "models/modeling_dropped_llama.py") |
|
|
}, |
|
|
"mistral": { |
|
|
"config": os.path.join(os.path.dirname(__file__), "models/configuration_dropped_mistral.py"), |
|
|
"model": os.path.join(os.path.dirname(__file__), "models/modeling_dropped_mistral.py") |
|
|
}, |
|
|
"deepseek": { |
|
|
"config": os.path.join(os.path.dirname(__file__), "models/configuration_deepseek.py"), |
|
|
"model": os.path.join(os.path.dirname(__file__), "models/modeling_dropped_deepseek.py") |
|
|
}, |
|
|
"gemma2": { |
|
|
"config": os.path.join(os.path.dirname(__file__), "models/configuration_dropped_gemma2.py"), |
|
|
"model": os.path.join(os.path.dirname(__file__), "models/modeling_dropped_gemma2.py") |
|
|
}, |
|
|
"baichuan": { |
|
|
"config": os.path.join(os.path.dirname(__file__), "models/configuration_dropped_baichuan.py"), |
|
|
"model": os.path.join(os.path.dirname(__file__), "models/modeling_dropped_baichuan.py") |
|
|
} |
|
|
} |
|
|
|