PolarSparsity / HybridTensor /utils /activations.py

Upload folder using huggingface_hub

b3a3b15 verified 8 months ago

10.7 kB

	import os
	import yaml

	import pandas as pd
	import matplotlib.pyplot as plt
	import numpy as np

	MODELS = [
	"facebook/opt-125m", # 1
	"facebook/opt-350m", # 2
	"facebook/opt-1.3b", # 3
	"facebook/opt-2.7b", # 4
	"facebook/opt-6.7b", # 5
	"facebook/opt-13b", # 6
	"facebook/opt-30b", # 7
	"facebook/opt-66b", # 8
	"SparseLLM/ReluLLaMA-7B", # 9
	"SparseLLM/prosparse-llama-2-7b", # 10
	"meta-llama/Llama-2-7b-hf", # 11
	"meta-llama/Llama-2-13b-hf", # 12
	"-",
	"meta-llama/Llama-3.1-8B", # 14
	"meta-llama/Llama-3.1-70B", # 15

	]


	CONFIGS = {
	'facebook/opt-175b':{
	'num_layer': 95,
	'd':12288,
	'h': 96,
	'neurons': 49152
	},
	'facebook/opt-66b':{
	'num_layer': 64,
	'd':9216,
	'h': 72,
	'neurons': 36864,
	'layer_imp': "results/attn_importance/opt-66b_attn_importance.json"
	},
	'facebook/opt-30b':{
	'num_layer': 48,
	'd':7168,
	'h': 56,
	'neurons': 28672,
	'layer_imp': "results/attn_importance/opt-30b_attn_importance.json"
	},
	'facebook/opt-13b':{
	'num_layer': 40,
	'd':5120,
	'h': 40,
	'neurons': 20480,
	'layer_imp': "results/attn_importance/opt-13b_attn_importance.json"
	},
	'facebook/opt-6.7b':{
	'num_layer': 32,
	'd':4096,
	'h': 32,
	'neurons': 16384,
	'layer_imp': "results/attn_importance/opt-6.7b_attn_importance.json"
	},
	'facebook/opt-2.7b':{
	'num_layer': 32,
	'd':2560,
	'h': 32,
	'neurons': 10240
	},
	'facebook/opt-1.3b':{
	'num_layer': 24,
	'd':2048,
	'h': 32,
	'neurons': 8192
	},
	'facebook/opt-350m':{
	'num_layer': 24,
	'd':1024,
	'h': 16,
	'neurons': 4096
	},
	'facebook/opt-125m':{
	'num_layer': 12,
	'd':768,
	'h': 12,
	'neurons': 3072
	},
	'SparseLLM/ReluLLaMA-7B':{
	'num_layer': 32,
	'd': 4096,
	'h': 32,
	'neurons': 11008
	},
	'SparseLLM/prosparse-llama-2-7b':{
	'num_layer': 32,
	'd': 4096,
	'h': 32,
	'neurons': 11008
	},
	'meta-llama/Llama-2-7b-hf':{
	'num_layer': 32,
	'd': 4096,
	'h': 32,
	'neurons': 11008,
	'layer_imp': "results/attn_importance/Llama-2-7b-hf_attn_importance.json"
	},
	'meta-llama/Llama-2-13b-hf':{
	'num_layer': 40,
	'd': 5120,
	'h': 40,
	'neurons': 13824,
	'layer_imp': "results/attn_importance/Llama-2-13b-hf_attn_importance.json"
	},
	'meta-llama/Llama-3.1-8B':{
	'num_layer': 32,
	'd': 4096,
	'h': 32,
	'neurons': 14336,
	'layer_imp': "results/attn_importance/Llama-3.1-8B_attn_importance.json"
	},
	'meta-llama/Llama-3.1-70B':{
	'num_layer': 80,
	'd': 8192,
	'h': 64,
	'neurons': 28672,
	'layer_imp': "results/attn_importance/Llama-3.1-70B_attn_importance.json"
	}

	}


	# This class is used to store the activation thresholds for each layer of the model. Used for evaluation purposes.
	class ActivationThresholds:
	def __init__(self, num_layers, attn_th=0.0, mlp_th = 0.0):
	self.activation_threshold = {}
	self.mlp_threshold = {}
	for i in range(num_layers):
	self.activation_threshold[i] = attn_th
	self.mlp_threshold[i] = mlp_th

	def set_threshold(self, layer_idx, threshold):
	self.activation_threshold[layer_idx] = threshold

	def get_threshold(self, layer_idx):
	return self.activation_threshold[layer_idx]

	def save_thresholds(self, file_path):
	with open(file_path, 'w') as file:
	documents = yaml.dump(self.activation_threshold, file)

	# def load_thresholds(self, file_path):
	# with open(file_path, 'r') as file:
	# self.activation_threshold = yaml.load(file, Loader=yaml.FullLoader)

	def load_thresholds(self, sparsity_map):
	"""
	Load the activation thresholds from a given sparsity map.

	Parameters:
	sparsity_map (dict): A dictionary containing layer indices and their corresponding activation thresholds.
	"""
	for layer_idx, threshold in sparsity_map.items():
	self.activation_threshold[layer_idx] = threshold

	@classmethod
	def from_file(cls, file_path):
	"""Class method to create an instance from a YAML file."""
	with open(file_path, 'r') as file:
	activation_threshold = yaml.load(file, Loader=yaml.FullLoader)

	# Create an instance and set its activation_threshold
	instance = cls()
	instance.activation_threshold = activation_threshold
	return instance


	def build_mlp_topk_lookup(data_path: str, batch_size: int, delta: int = 128) -> dict:
	"""
	Creates a lookup table from the CSV file 'mlp_act_batch_{batch_size}_stats.csv' in the given directory.

	The lookup maps each layer to a top-k value calculated as:
	top_k = ceil((average_activation + std_activation + delta))

	Parameters:
	data_path (str): The path to the directory containing the CSV files.
	batch_size (int): The batch size to use in the file name and for filtering.
	delta (float): The additional value added to the activations before rounding.

	Returns:
	dict: A mapping from layer id to computed top-k value.

	Raises:
	FileNotFoundError: If the CSV file does not exist in the provided directory.
	"""
	file_name = f"mlp_act_batch_{batch_size}_stats.csv"
	full_path = os.path.join(data_path, file_name)

	if not os.path.exists(full_path):
	raise FileNotFoundError(f"File not found: {full_path}")

	# Read the CSV file into a DataFrame
	df = pd.read_csv(full_path)

	def calc_top_k(avg, std, delta):
	raw_value = avg + std + delta
	# return int(np.ceil(raw_value /128) * 128) # Round up to the nearest multiple of 128
	return int(np.ceil(raw_value))

	mlp_lookup = {
	row["layer"]: calc_top_k(row["average_activation"], row["std_activation"], delta)
	for _, row in df.iterrows() if row["batch_size"] == batch_size
	}

	return mlp_lookup


	def _update_hf_mlp_topk(model, mlp_lookup):
	"""
	Updates the top-k values in the model's MLP layers using the provided lookup table.

	Parameters:
	model (HybridModel): The model to update.
	mlp_lookup (dict): The lookup table mapping layer id to top-k value.
	delta (int): The additional value added to the activations before rounding.
	"""
	for layer_idx, top_k in mlp_lookup.items():
	model.model.decoder.layers[layer_idx].mlp_act= int(top_k)


	def identify_model_type(model_name):
	"""
	Identifies if the given model name is an OPT model or a Llama model.

	Args:
	model_name (str): The name of the model.

	Returns:
	str: "OPT" if the model is an OPT model, "Llama" if it's a Llama model, or "Unknown".
	"""
	if "opt" in model_name.lower():
	return "OPT"
	elif "llama" in model_name.lower():
	return "Llama"
	else:
	return "Unknown"


	OPT_MODELS = [
	"facebook/opt-125m", # 1
	"facebook/opt-350m", # 2
	"facebook/opt-1.3b", # 3
	"facebook/opt-2.7b", # 4
	"facebook/opt-6.7b", # 5
	"facebook/opt-13b", # 6
	"facebook/opt-30b", # 7
	"facebook/opt-66b" # 8
	]


	OPT_CONFIGS = {
	'facebook/opt-175b':{
	'num_layer': 95,
	'sp_config': None,
	'd':12288,
	'h': 96,
	},
	'facebook/opt-66b':{
	'num_layer': 64,
	'd':9216,
	'h': 72,
	},
	'facebook/opt-30b':{
	'num_layer': 48,
	'd':7168,
	'h': 56,
	},
	'facebook/opt-13b':{
	'num_layer': 40,
	'd':5120,
	'h': 40,
	},
	'facebook/opt-6.7b':{
	'num_layer': 32,
	'd':4096,
	'h': 32,
	},
	'facebook/opt-2.7b':{
	'num_layer': 32,
	'd':2560,
	'h': 32,
	},
	'facebook/opt-1.3b':{
	'num_layer': 24,
	'd':2048,
	'h': 32,
	},
	'facebook/opt-350m':{
	'num_layer': 24,
	'd':1024,
	'h': 16,
	},
	'facebook/opt-125m':{
	'num_layer': 12,
	'd':768,
	'h': 12,
	},
	}

	'''
	import seaborn as sns
	def plot_average_activation(directory_path, model_name):


	# Read all the .csv files in the specified directory and concatenate them into a single DataFrame
	files = [os.path.join(directory_path, f) for f in os.listdir(directory_path) if f.endswith('.csv')]
	df = pd.concat([pd.read_csv(f) for f in files])

	# Set the seaborn style for better aesthetics
	sns.set(style="whitegrid")

	total_neurons = OPT_CONFIGS[model_name]["d"] * 4
	# Compute the average activation percentage and standard deviation percentage
	df['average_activation_percentage'] = (df['average_activation'] / total_neurons) * 100
	df['std_activation_percentage'] = (df['std_activation'] / total_neurons) * 100

	# Create a color palette with a different color for each batch size
	palette = sns.color_palette("husl", df['batch_size'].nunique())

	# Initialize the matplotlib figure
	plt.figure(figsize=(12, 8))

	# Loop over each batch size and plot the average_activation_percentage with error bars
	for i, (batch_size, group) in enumerate(df.groupby('batch_size')):
	plt.errorbar(
	group['layer'],
	group['average_activation_percentage'],
	yerr=group['std_activation_percentage'],
	label=f'Batch Size {batch_size}',
	capsize=3,
	marker='o',
	linestyle='-',
	color=palette[i]
	)

	# Set y-axis ticks at every 10% increment
	plt.yticks(range(0, 101, 10)) # Y-ticks from 0% to 100% in steps of 10%

	# Shaded region
	plt.axhspan(80, plt.ylim()[1], facecolor='gray', alpha=0.1)

	# Set the labels and title of the plot
	plt.xlabel('Layer', fontsize=14)
	plt.ylabel('Average Activation Percentage (%)', fontsize=14)
	plt.title(f'Model: {model_name} Average Activation Percentage vs Layer for Different Batch Sizes', fontsize=16)

	# Show legend
	plt.legend(title='Batch Size', fontsize=12, title_fontsize=12)

	# Tight layout for better spacing
	plt.tight_layout()

	# Save the image
	plt.savefig('average_activation_analysis.png')

	# Display the plot
	plt.show()



	'''