|
|
import os |
|
|
import yaml |
|
|
|
|
|
import pandas as pd |
|
|
import matplotlib.pyplot as plt |
|
|
import numpy as np |
|
|
|
|
|
MODELS = [ |
|
|
"facebook/opt-125m", |
|
|
"facebook/opt-350m", |
|
|
"facebook/opt-1.3b", |
|
|
"facebook/opt-2.7b", |
|
|
"facebook/opt-6.7b", |
|
|
"facebook/opt-13b", |
|
|
"facebook/opt-30b", |
|
|
"facebook/opt-66b", |
|
|
"SparseLLM/ReluLLaMA-7B", |
|
|
"SparseLLM/prosparse-llama-2-7b", |
|
|
"meta-llama/Llama-2-7b-hf", |
|
|
"meta-llama/Llama-2-13b-hf", |
|
|
"-", |
|
|
"meta-llama/Llama-3.1-8B", |
|
|
"meta-llama/Llama-3.1-70B", |
|
|
|
|
|
] |
|
|
|
|
|
|
|
|
CONFIGS = { |
|
|
'facebook/opt-175b':{ |
|
|
'num_layer': 95, |
|
|
'd':12288, |
|
|
'h': 96, |
|
|
'neurons': 49152 |
|
|
}, |
|
|
'facebook/opt-66b':{ |
|
|
'num_layer': 64, |
|
|
'd':9216, |
|
|
'h': 72, |
|
|
'neurons': 36864, |
|
|
'layer_imp': "results/attn_importance/opt-66b_attn_importance.json" |
|
|
}, |
|
|
'facebook/opt-30b':{ |
|
|
'num_layer': 48, |
|
|
'd':7168, |
|
|
'h': 56, |
|
|
'neurons': 28672, |
|
|
'layer_imp': "results/attn_importance/opt-30b_attn_importance.json" |
|
|
}, |
|
|
'facebook/opt-13b':{ |
|
|
'num_layer': 40, |
|
|
'd':5120, |
|
|
'h': 40, |
|
|
'neurons': 20480, |
|
|
'layer_imp': "results/attn_importance/opt-13b_attn_importance.json" |
|
|
}, |
|
|
'facebook/opt-6.7b':{ |
|
|
'num_layer': 32, |
|
|
'd':4096, |
|
|
'h': 32, |
|
|
'neurons': 16384, |
|
|
'layer_imp': "results/attn_importance/opt-6.7b_attn_importance.json" |
|
|
}, |
|
|
'facebook/opt-2.7b':{ |
|
|
'num_layer': 32, |
|
|
'd':2560, |
|
|
'h': 32, |
|
|
'neurons': 10240 |
|
|
}, |
|
|
'facebook/opt-1.3b':{ |
|
|
'num_layer': 24, |
|
|
'd':2048, |
|
|
'h': 32, |
|
|
'neurons': 8192 |
|
|
}, |
|
|
'facebook/opt-350m':{ |
|
|
'num_layer': 24, |
|
|
'd':1024, |
|
|
'h': 16, |
|
|
'neurons': 4096 |
|
|
}, |
|
|
'facebook/opt-125m':{ |
|
|
'num_layer': 12, |
|
|
'd':768, |
|
|
'h': 12, |
|
|
'neurons': 3072 |
|
|
}, |
|
|
'SparseLLM/ReluLLaMA-7B':{ |
|
|
'num_layer': 32, |
|
|
'd': 4096, |
|
|
'h': 32, |
|
|
'neurons': 11008 |
|
|
}, |
|
|
'SparseLLM/prosparse-llama-2-7b':{ |
|
|
'num_layer': 32, |
|
|
'd': 4096, |
|
|
'h': 32, |
|
|
'neurons': 11008 |
|
|
}, |
|
|
'meta-llama/Llama-2-7b-hf':{ |
|
|
'num_layer': 32, |
|
|
'd': 4096, |
|
|
'h': 32, |
|
|
'neurons': 11008, |
|
|
'layer_imp': "results/attn_importance/Llama-2-7b-hf_attn_importance.json" |
|
|
}, |
|
|
'meta-llama/Llama-2-13b-hf':{ |
|
|
'num_layer': 40, |
|
|
'd': 5120, |
|
|
'h': 40, |
|
|
'neurons': 13824, |
|
|
'layer_imp': "results/attn_importance/Llama-2-13b-hf_attn_importance.json" |
|
|
}, |
|
|
'meta-llama/Llama-3.1-8B':{ |
|
|
'num_layer': 32, |
|
|
'd': 4096, |
|
|
'h': 32, |
|
|
'neurons': 14336, |
|
|
'layer_imp': "results/attn_importance/Llama-3.1-8B_attn_importance.json" |
|
|
}, |
|
|
'meta-llama/Llama-3.1-70B':{ |
|
|
'num_layer': 80, |
|
|
'd': 8192, |
|
|
'h': 64, |
|
|
'neurons': 28672, |
|
|
'layer_imp': "results/attn_importance/Llama-3.1-70B_attn_importance.json" |
|
|
} |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
class ActivationThresholds: |
|
|
def __init__(self, num_layers, attn_th=0.0, mlp_th = 0.0): |
|
|
self.activation_threshold = {} |
|
|
self.mlp_threshold = {} |
|
|
for i in range(num_layers): |
|
|
self.activation_threshold[i] = attn_th |
|
|
self.mlp_threshold[i] = mlp_th |
|
|
|
|
|
def set_threshold(self, layer_idx, threshold): |
|
|
self.activation_threshold[layer_idx] = threshold |
|
|
|
|
|
def get_threshold(self, layer_idx): |
|
|
return self.activation_threshold[layer_idx] |
|
|
|
|
|
def save_thresholds(self, file_path): |
|
|
with open(file_path, 'w') as file: |
|
|
documents = yaml.dump(self.activation_threshold, file) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def load_thresholds(self, sparsity_map): |
|
|
""" |
|
|
Load the activation thresholds from a given sparsity map. |
|
|
|
|
|
Parameters: |
|
|
sparsity_map (dict): A dictionary containing layer indices and their corresponding activation thresholds. |
|
|
""" |
|
|
for layer_idx, threshold in sparsity_map.items(): |
|
|
self.activation_threshold[layer_idx] = threshold |
|
|
|
|
|
@classmethod |
|
|
def from_file(cls, file_path): |
|
|
"""Class method to create an instance from a YAML file.""" |
|
|
with open(file_path, 'r') as file: |
|
|
activation_threshold = yaml.load(file, Loader=yaml.FullLoader) |
|
|
|
|
|
|
|
|
instance = cls() |
|
|
instance.activation_threshold = activation_threshold |
|
|
return instance |
|
|
|
|
|
|
|
|
def build_mlp_topk_lookup(data_path: str, batch_size: int, delta: int = 128) -> dict: |
|
|
""" |
|
|
Creates a lookup table from the CSV file 'mlp_act_batch_{batch_size}_stats.csv' in the given directory. |
|
|
|
|
|
The lookup maps each layer to a top-k value calculated as: |
|
|
top_k = ceil((average_activation + std_activation + delta)) |
|
|
|
|
|
Parameters: |
|
|
data_path (str): The path to the directory containing the CSV files. |
|
|
batch_size (int): The batch size to use in the file name and for filtering. |
|
|
delta (float): The additional value added to the activations before rounding. |
|
|
|
|
|
Returns: |
|
|
dict: A mapping from layer id to computed top-k value. |
|
|
|
|
|
Raises: |
|
|
FileNotFoundError: If the CSV file does not exist in the provided directory. |
|
|
""" |
|
|
file_name = f"mlp_act_batch_{batch_size}_stats.csv" |
|
|
full_path = os.path.join(data_path, file_name) |
|
|
|
|
|
if not os.path.exists(full_path): |
|
|
raise FileNotFoundError(f"File not found: {full_path}") |
|
|
|
|
|
|
|
|
df = pd.read_csv(full_path) |
|
|
|
|
|
def calc_top_k(avg, std, delta): |
|
|
raw_value = avg + std + delta |
|
|
|
|
|
return int(np.ceil(raw_value)) |
|
|
|
|
|
mlp_lookup = { |
|
|
row["layer"]: calc_top_k(row["average_activation"], row["std_activation"], delta) |
|
|
for _, row in df.iterrows() if row["batch_size"] == batch_size |
|
|
} |
|
|
|
|
|
return mlp_lookup |
|
|
|
|
|
|
|
|
def _update_hf_mlp_topk(model, mlp_lookup): |
|
|
""" |
|
|
Updates the top-k values in the model's MLP layers using the provided lookup table. |
|
|
|
|
|
Parameters: |
|
|
model (HybridModel): The model to update. |
|
|
mlp_lookup (dict): The lookup table mapping layer id to top-k value. |
|
|
delta (int): The additional value added to the activations before rounding. |
|
|
""" |
|
|
for layer_idx, top_k in mlp_lookup.items(): |
|
|
model.model.decoder.layers[layer_idx].mlp_act= int(top_k) |
|
|
|
|
|
|
|
|
def identify_model_type(model_name): |
|
|
""" |
|
|
Identifies if the given model name is an OPT model or a Llama model. |
|
|
|
|
|
Args: |
|
|
model_name (str): The name of the model. |
|
|
|
|
|
Returns: |
|
|
str: "OPT" if the model is an OPT model, "Llama" if it's a Llama model, or "Unknown". |
|
|
""" |
|
|
if "opt" in model_name.lower(): |
|
|
return "OPT" |
|
|
elif "llama" in model_name.lower(): |
|
|
return "Llama" |
|
|
else: |
|
|
return "Unknown" |
|
|
|
|
|
|
|
|
OPT_MODELS = [ |
|
|
"facebook/opt-125m", |
|
|
"facebook/opt-350m", |
|
|
"facebook/opt-1.3b", |
|
|
"facebook/opt-2.7b", |
|
|
"facebook/opt-6.7b", |
|
|
"facebook/opt-13b", |
|
|
"facebook/opt-30b", |
|
|
"facebook/opt-66b" |
|
|
] |
|
|
|
|
|
|
|
|
OPT_CONFIGS = { |
|
|
'facebook/opt-175b':{ |
|
|
'num_layer': 95, |
|
|
'sp_config': None, |
|
|
'd':12288, |
|
|
'h': 96, |
|
|
}, |
|
|
'facebook/opt-66b':{ |
|
|
'num_layer': 64, |
|
|
'd':9216, |
|
|
'h': 72, |
|
|
}, |
|
|
'facebook/opt-30b':{ |
|
|
'num_layer': 48, |
|
|
'd':7168, |
|
|
'h': 56, |
|
|
}, |
|
|
'facebook/opt-13b':{ |
|
|
'num_layer': 40, |
|
|
'd':5120, |
|
|
'h': 40, |
|
|
}, |
|
|
'facebook/opt-6.7b':{ |
|
|
'num_layer': 32, |
|
|
'd':4096, |
|
|
'h': 32, |
|
|
}, |
|
|
'facebook/opt-2.7b':{ |
|
|
'num_layer': 32, |
|
|
'd':2560, |
|
|
'h': 32, |
|
|
}, |
|
|
'facebook/opt-1.3b':{ |
|
|
'num_layer': 24, |
|
|
'd':2048, |
|
|
'h': 32, |
|
|
}, |
|
|
'facebook/opt-350m':{ |
|
|
'num_layer': 24, |
|
|
'd':1024, |
|
|
'h': 16, |
|
|
}, |
|
|
'facebook/opt-125m':{ |
|
|
'num_layer': 12, |
|
|
'd':768, |
|
|
'h': 12, |
|
|
}, |
|
|
} |
|
|
|
|
|
''' |
|
|
import seaborn as sns |
|
|
def plot_average_activation(directory_path, model_name): |
|
|
|
|
|
|
|
|
# Read all the .csv files in the specified directory and concatenate them into a single DataFrame |
|
|
files = [os.path.join(directory_path, f) for f in os.listdir(directory_path) if f.endswith('.csv')] |
|
|
df = pd.concat([pd.read_csv(f) for f in files]) |
|
|
|
|
|
# Set the seaborn style for better aesthetics |
|
|
sns.set(style="whitegrid") |
|
|
|
|
|
total_neurons = OPT_CONFIGS[model_name]["d"] * 4 |
|
|
# Compute the average activation percentage and standard deviation percentage |
|
|
df['average_activation_percentage'] = (df['average_activation'] / total_neurons) * 100 |
|
|
df['std_activation_percentage'] = (df['std_activation'] / total_neurons) * 100 |
|
|
|
|
|
# Create a color palette with a different color for each batch size |
|
|
palette = sns.color_palette("husl", df['batch_size'].nunique()) |
|
|
|
|
|
# Initialize the matplotlib figure |
|
|
plt.figure(figsize=(12, 8)) |
|
|
|
|
|
# Loop over each batch size and plot the average_activation_percentage with error bars |
|
|
for i, (batch_size, group) in enumerate(df.groupby('batch_size')): |
|
|
plt.errorbar( |
|
|
group['layer'], |
|
|
group['average_activation_percentage'], |
|
|
yerr=group['std_activation_percentage'], |
|
|
label=f'Batch Size {batch_size}', |
|
|
capsize=3, |
|
|
marker='o', |
|
|
linestyle='-', |
|
|
color=palette[i] |
|
|
) |
|
|
|
|
|
# Set y-axis ticks at every 10% increment |
|
|
plt.yticks(range(0, 101, 10)) # Y-ticks from 0% to 100% in steps of 10% |
|
|
|
|
|
# Shaded region |
|
|
plt.axhspan(80, plt.ylim()[1], facecolor='gray', alpha=0.1) |
|
|
|
|
|
# Set the labels and title of the plot |
|
|
plt.xlabel('Layer', fontsize=14) |
|
|
plt.ylabel('Average Activation Percentage (%)', fontsize=14) |
|
|
plt.title(f'Model: {model_name} Average Activation Percentage vs Layer for Different Batch Sizes', fontsize=16) |
|
|
|
|
|
# Show legend |
|
|
plt.legend(title='Batch Size', fontsize=12, title_fontsize=12) |
|
|
|
|
|
# Tight layout for better spacing |
|
|
plt.tight_layout() |
|
|
|
|
|
# Save the image |
|
|
plt.savefig('average_activation_analysis.png') |
|
|
|
|
|
# Display the plot |
|
|
plt.show() |
|
|
|
|
|
|
|
|
|
|
|
''' |
|
|
|