Susav's picture
Upload folder using huggingface_hub
b3a3b15 verified
import os
import yaml
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
MODELS = [
"facebook/opt-125m", # 1
"facebook/opt-350m", # 2
"facebook/opt-1.3b", # 3
"facebook/opt-2.7b", # 4
"facebook/opt-6.7b", # 5
"facebook/opt-13b", # 6
"facebook/opt-30b", # 7
"facebook/opt-66b", # 8
"SparseLLM/ReluLLaMA-7B", # 9
"SparseLLM/prosparse-llama-2-7b", # 10
"meta-llama/Llama-2-7b-hf", # 11
"meta-llama/Llama-2-13b-hf", # 12
"-",
"meta-llama/Llama-3.1-8B", # 14
"meta-llama/Llama-3.1-70B", # 15
]
CONFIGS = {
'facebook/opt-175b':{
'num_layer': 95,
'd':12288,
'h': 96,
'neurons': 49152
},
'facebook/opt-66b':{
'num_layer': 64,
'd':9216,
'h': 72,
'neurons': 36864,
'layer_imp': "results/attn_importance/opt-66b_attn_importance.json"
},
'facebook/opt-30b':{
'num_layer': 48,
'd':7168,
'h': 56,
'neurons': 28672,
'layer_imp': "results/attn_importance/opt-30b_attn_importance.json"
},
'facebook/opt-13b':{
'num_layer': 40,
'd':5120,
'h': 40,
'neurons': 20480,
'layer_imp': "results/attn_importance/opt-13b_attn_importance.json"
},
'facebook/opt-6.7b':{
'num_layer': 32,
'd':4096,
'h': 32,
'neurons': 16384,
'layer_imp': "results/attn_importance/opt-6.7b_attn_importance.json"
},
'facebook/opt-2.7b':{
'num_layer': 32,
'd':2560,
'h': 32,
'neurons': 10240
},
'facebook/opt-1.3b':{
'num_layer': 24,
'd':2048,
'h': 32,
'neurons': 8192
},
'facebook/opt-350m':{
'num_layer': 24,
'd':1024,
'h': 16,
'neurons': 4096
},
'facebook/opt-125m':{
'num_layer': 12,
'd':768,
'h': 12,
'neurons': 3072
},
'SparseLLM/ReluLLaMA-7B':{
'num_layer': 32,
'd': 4096,
'h': 32,
'neurons': 11008
},
'SparseLLM/prosparse-llama-2-7b':{
'num_layer': 32,
'd': 4096,
'h': 32,
'neurons': 11008
},
'meta-llama/Llama-2-7b-hf':{
'num_layer': 32,
'd': 4096,
'h': 32,
'neurons': 11008,
'layer_imp': "results/attn_importance/Llama-2-7b-hf_attn_importance.json"
},
'meta-llama/Llama-2-13b-hf':{
'num_layer': 40,
'd': 5120,
'h': 40,
'neurons': 13824,
'layer_imp': "results/attn_importance/Llama-2-13b-hf_attn_importance.json"
},
'meta-llama/Llama-3.1-8B':{
'num_layer': 32,
'd': 4096,
'h': 32,
'neurons': 14336,
'layer_imp': "results/attn_importance/Llama-3.1-8B_attn_importance.json"
},
'meta-llama/Llama-3.1-70B':{
'num_layer': 80,
'd': 8192,
'h': 64,
'neurons': 28672,
'layer_imp': "results/attn_importance/Llama-3.1-70B_attn_importance.json"
}
}
# This class is used to store the activation thresholds for each layer of the model. Used for evaluation purposes.
class ActivationThresholds:
def __init__(self, num_layers, attn_th=0.0, mlp_th = 0.0):
self.activation_threshold = {}
self.mlp_threshold = {}
for i in range(num_layers):
self.activation_threshold[i] = attn_th
self.mlp_threshold[i] = mlp_th
def set_threshold(self, layer_idx, threshold):
self.activation_threshold[layer_idx] = threshold
def get_threshold(self, layer_idx):
return self.activation_threshold[layer_idx]
def save_thresholds(self, file_path):
with open(file_path, 'w') as file:
documents = yaml.dump(self.activation_threshold, file)
# def load_thresholds(self, file_path):
# with open(file_path, 'r') as file:
# self.activation_threshold = yaml.load(file, Loader=yaml.FullLoader)
def load_thresholds(self, sparsity_map):
"""
Load the activation thresholds from a given sparsity map.
Parameters:
sparsity_map (dict): A dictionary containing layer indices and their corresponding activation thresholds.
"""
for layer_idx, threshold in sparsity_map.items():
self.activation_threshold[layer_idx] = threshold
@classmethod
def from_file(cls, file_path):
"""Class method to create an instance from a YAML file."""
with open(file_path, 'r') as file:
activation_threshold = yaml.load(file, Loader=yaml.FullLoader)
# Create an instance and set its activation_threshold
instance = cls()
instance.activation_threshold = activation_threshold
return instance
def build_mlp_topk_lookup(data_path: str, batch_size: int, delta: int = 128) -> dict:
"""
Creates a lookup table from the CSV file 'mlp_act_batch_{batch_size}_stats.csv' in the given directory.
The lookup maps each layer to a top-k value calculated as:
top_k = ceil((average_activation + std_activation + delta))
Parameters:
data_path (str): The path to the directory containing the CSV files.
batch_size (int): The batch size to use in the file name and for filtering.
delta (float): The additional value added to the activations before rounding.
Returns:
dict: A mapping from layer id to computed top-k value.
Raises:
FileNotFoundError: If the CSV file does not exist in the provided directory.
"""
file_name = f"mlp_act_batch_{batch_size}_stats.csv"
full_path = os.path.join(data_path, file_name)
if not os.path.exists(full_path):
raise FileNotFoundError(f"File not found: {full_path}")
# Read the CSV file into a DataFrame
df = pd.read_csv(full_path)
def calc_top_k(avg, std, delta):
raw_value = avg + std + delta
# return int(np.ceil(raw_value /128) * 128) # Round up to the nearest multiple of 128
return int(np.ceil(raw_value))
mlp_lookup = {
row["layer"]: calc_top_k(row["average_activation"], row["std_activation"], delta)
for _, row in df.iterrows() if row["batch_size"] == batch_size
}
return mlp_lookup
def _update_hf_mlp_topk(model, mlp_lookup):
"""
Updates the top-k values in the model's MLP layers using the provided lookup table.
Parameters:
model (HybridModel): The model to update.
mlp_lookup (dict): The lookup table mapping layer id to top-k value.
delta (int): The additional value added to the activations before rounding.
"""
for layer_idx, top_k in mlp_lookup.items():
model.model.decoder.layers[layer_idx].mlp_act= int(top_k)
def identify_model_type(model_name):
"""
Identifies if the given model name is an OPT model or a Llama model.
Args:
model_name (str): The name of the model.
Returns:
str: "OPT" if the model is an OPT model, "Llama" if it's a Llama model, or "Unknown".
"""
if "opt" in model_name.lower():
return "OPT"
elif "llama" in model_name.lower():
return "Llama"
else:
return "Unknown"
OPT_MODELS = [
"facebook/opt-125m", # 1
"facebook/opt-350m", # 2
"facebook/opt-1.3b", # 3
"facebook/opt-2.7b", # 4
"facebook/opt-6.7b", # 5
"facebook/opt-13b", # 6
"facebook/opt-30b", # 7
"facebook/opt-66b" # 8
]
OPT_CONFIGS = {
'facebook/opt-175b':{
'num_layer': 95,
'sp_config': None,
'd':12288,
'h': 96,
},
'facebook/opt-66b':{
'num_layer': 64,
'd':9216,
'h': 72,
},
'facebook/opt-30b':{
'num_layer': 48,
'd':7168,
'h': 56,
},
'facebook/opt-13b':{
'num_layer': 40,
'd':5120,
'h': 40,
},
'facebook/opt-6.7b':{
'num_layer': 32,
'd':4096,
'h': 32,
},
'facebook/opt-2.7b':{
'num_layer': 32,
'd':2560,
'h': 32,
},
'facebook/opt-1.3b':{
'num_layer': 24,
'd':2048,
'h': 32,
},
'facebook/opt-350m':{
'num_layer': 24,
'd':1024,
'h': 16,
},
'facebook/opt-125m':{
'num_layer': 12,
'd':768,
'h': 12,
},
}
'''
import seaborn as sns
def plot_average_activation(directory_path, model_name):
# Read all the .csv files in the specified directory and concatenate them into a single DataFrame
files = [os.path.join(directory_path, f) for f in os.listdir(directory_path) if f.endswith('.csv')]
df = pd.concat([pd.read_csv(f) for f in files])
# Set the seaborn style for better aesthetics
sns.set(style="whitegrid")
total_neurons = OPT_CONFIGS[model_name]["d"] * 4
# Compute the average activation percentage and standard deviation percentage
df['average_activation_percentage'] = (df['average_activation'] / total_neurons) * 100
df['std_activation_percentage'] = (df['std_activation'] / total_neurons) * 100
# Create a color palette with a different color for each batch size
palette = sns.color_palette("husl", df['batch_size'].nunique())
# Initialize the matplotlib figure
plt.figure(figsize=(12, 8))
# Loop over each batch size and plot the average_activation_percentage with error bars
for i, (batch_size, group) in enumerate(df.groupby('batch_size')):
plt.errorbar(
group['layer'],
group['average_activation_percentage'],
yerr=group['std_activation_percentage'],
label=f'Batch Size {batch_size}',
capsize=3,
marker='o',
linestyle='-',
color=palette[i]
)
# Set y-axis ticks at every 10% increment
plt.yticks(range(0, 101, 10)) # Y-ticks from 0% to 100% in steps of 10%
# Shaded region
plt.axhspan(80, plt.ylim()[1], facecolor='gray', alpha=0.1)
# Set the labels and title of the plot
plt.xlabel('Layer', fontsize=14)
plt.ylabel('Average Activation Percentage (%)', fontsize=14)
plt.title(f'Model: {model_name} Average Activation Percentage vs Layer for Different Batch Sizes', fontsize=16)
# Show legend
plt.legend(title='Batch Size', fontsize=12, title_fontsize=12)
# Tight layout for better spacing
plt.tight_layout()
# Save the image
plt.savefig('average_activation_analysis.png')
# Display the plot
plt.show()
'''