import os
import yaml

import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

MODELS = [
    "facebook/opt-125m",        # 1
    "facebook/opt-350m",        # 2
    "facebook/opt-1.3b",        # 3
    "facebook/opt-2.7b",        # 4
    "facebook/opt-6.7b",        # 5
    "facebook/opt-13b",         # 6
    "facebook/opt-30b",         # 7
    "facebook/opt-66b",          # 8
    "SparseLLM/ReluLLaMA-7B",   # 9
    "SparseLLM/prosparse-llama-2-7b", # 10
    "meta-llama/Llama-2-7b-hf",     # 11
    "meta-llama/Llama-2-13b-hf",    # 12
    "-",
    "meta-llama/Llama-3.1-8B",      # 14
    "meta-llama/Llama-3.1-70B",     # 15
    
]


CONFIGS = {
    'facebook/opt-175b':{
        'num_layer': 95,
        'd':12288,
        'h': 96,
        'neurons': 49152
    },
    'facebook/opt-66b':{
        'num_layer': 64,
        'd':9216,
        'h': 72,
        'neurons': 36864,
        'layer_imp': "results/attn_importance/opt-66b_attn_importance.json"
    },
    'facebook/opt-30b':{
        'num_layer': 48,
        'd':7168,
        'h': 56,
        'neurons': 28672,
        'layer_imp': "results/attn_importance/opt-30b_attn_importance.json"
    },
    'facebook/opt-13b':{
        'num_layer': 40,
        'd':5120,
        'h': 40,
        'neurons': 20480,
        'layer_imp': "results/attn_importance/opt-13b_attn_importance.json"
    },
    'facebook/opt-6.7b':{
        'num_layer': 32,
        'd':4096,
        'h': 32,
        'neurons': 16384,
        'layer_imp': "results/attn_importance/opt-6.7b_attn_importance.json"
    },
    'facebook/opt-2.7b':{
        'num_layer': 32,
        'd':2560,
        'h': 32,
        'neurons': 10240
    },
    'facebook/opt-1.3b':{
        'num_layer': 24,
        'd':2048,
        'h': 32,
        'neurons': 8192
    },
    'facebook/opt-350m':{
        'num_layer': 24,
        'd':1024,
        'h': 16,
        'neurons': 4096
    },
    'facebook/opt-125m':{
        'num_layer': 12,
        'd':768,
        'h': 12,
        'neurons': 3072
    },
    'SparseLLM/ReluLLaMA-7B':{
        'num_layer': 32,
        'd': 4096,
        'h': 32,
        'neurons': 11008
    },
    'SparseLLM/prosparse-llama-2-7b':{
        'num_layer': 32,
        'd': 4096,
        'h': 32,
        'neurons': 11008
    },
    'meta-llama/Llama-2-7b-hf':{
        'num_layer': 32,
        'd': 4096,
        'h': 32,
        'neurons': 11008,
        'layer_imp': "results/attn_importance/Llama-2-7b-hf_attn_importance.json"
    },
    'meta-llama/Llama-2-13b-hf':{
        'num_layer': 40,
        'd': 5120,
        'h': 40,
        'neurons': 13824,
        'layer_imp': "results/attn_importance/Llama-2-13b-hf_attn_importance.json"
    },
    'meta-llama/Llama-3.1-8B':{
        'num_layer': 32,
        'd': 4096,
        'h': 32,
        'neurons': 14336,
        'layer_imp': "results/attn_importance/Llama-3.1-8B_attn_importance.json"
    },
    'meta-llama/Llama-3.1-70B':{
        'num_layer': 80,
        'd': 8192,
        'h': 64,
        'neurons': 28672,
        'layer_imp': "results/attn_importance/Llama-3.1-70B_attn_importance.json"
    }
    
}


# This class is used to store the activation thresholds for each layer of the model. Used for evaluation purposes.
class ActivationThresholds:
    def __init__(self, num_layers, attn_th=0.0, mlp_th = 0.0):
        self.activation_threshold = {}
        self.mlp_threshold = {}
        for i in range(num_layers):
            self.activation_threshold[i] = attn_th
            self.mlp_threshold[i] = mlp_th

    def set_threshold(self, layer_idx, threshold):
        self.activation_threshold[layer_idx] = threshold

    def get_threshold(self, layer_idx):
        return self.activation_threshold[layer_idx]

    def save_thresholds(self, file_path):
        with open(file_path, 'w') as file:
            documents = yaml.dump(self.activation_threshold, file)

    # def load_thresholds(self, file_path):
    #     with open(file_path, 'r') as file:
    #         self.activation_threshold = yaml.load(file, Loader=yaml.FullLoader)
    
    def load_thresholds(self, sparsity_map):
        """
        Load the activation thresholds from a given sparsity map.
        
        Parameters:
            sparsity_map (dict): A dictionary containing layer indices and their corresponding activation thresholds.
        """
        for layer_idx, threshold in sparsity_map.items():
            self.activation_threshold[layer_idx] = threshold
    
    @classmethod
    def from_file(cls, file_path):
        """Class method to create an instance from a YAML file."""
        with open(file_path, 'r') as file:
            activation_threshold = yaml.load(file, Loader=yaml.FullLoader)
        
        # Create an instance and set its activation_threshold
        instance = cls()
        instance.activation_threshold = activation_threshold
        return instance
    
    
def build_mlp_topk_lookup(data_path: str, batch_size: int, delta: int = 128) -> dict:
    """
    Creates a lookup table from the CSV file 'mlp_act_batch_{batch_size}_stats.csv' in the given directory.
    
    The lookup maps each layer to a top-k value calculated as:
      top_k = ceil((average_activation + std_activation + delta))
    
    Parameters:
        data_path (str): The path to the directory containing the CSV files.
        batch_size (int): The batch size to use in the file name and for filtering.
        delta (float): The additional value added to the activations before rounding.
    
    Returns:
        dict: A mapping from layer id to computed top-k value.
    
    Raises:
        FileNotFoundError: If the CSV file does not exist in the provided directory.
    """
    file_name = f"mlp_act_batch_{batch_size}_stats.csv"
    full_path = os.path.join(data_path, file_name)
    
    if not os.path.exists(full_path):
        raise FileNotFoundError(f"File not found: {full_path}")
    
    # Read the CSV file into a DataFrame
    df = pd.read_csv(full_path)
    
    def calc_top_k(avg, std, delta):
        raw_value = avg + std + delta
        # return int(np.ceil(raw_value /128) * 128)    # Round up to the nearest multiple of 128
        return int(np.ceil(raw_value))
    
    mlp_lookup = {
        row["layer"]: calc_top_k(row["average_activation"], row["std_activation"], delta)
        for _, row in df.iterrows() if row["batch_size"] == batch_size
    }
    
    return mlp_lookup


def _update_hf_mlp_topk(model, mlp_lookup):
    """
    Updates the top-k values in the model's MLP layers using the provided lookup table.
    
    Parameters:
        model (HybridModel): The model to update.
        mlp_lookup (dict): The lookup table mapping layer id to top-k value.
        delta (int): The additional value added to the activations before rounding.
    """
    for layer_idx, top_k in mlp_lookup.items():
        model.model.decoder.layers[layer_idx].mlp_act= int(top_k)


def identify_model_type(model_name):
    """
    Identifies if the given model name is an OPT model or a Llama model.

    Args:
        model_name (str): The name of the model.

    Returns:
        str: "OPT" if the model is an OPT model, "Llama" if it's a Llama model, or "Unknown".
    """
    if "opt" in model_name.lower():
        return "OPT"
    elif "llama" in model_name.lower():
        return "Llama"
    else:
        return "Unknown"


OPT_MODELS = [
    "facebook/opt-125m",    # 1
    "facebook/opt-350m",    # 2
    "facebook/opt-1.3b",    # 3
    "facebook/opt-2.7b",    # 4
    "facebook/opt-6.7b",    # 5
    "facebook/opt-13b",     # 6
    "facebook/opt-30b",     # 7
    "facebook/opt-66b"      # 8
]


OPT_CONFIGS = {
    'facebook/opt-175b':{
        'num_layer': 95,
        'sp_config': None,
        'd':12288,
        'h': 96,
    },
    'facebook/opt-66b':{
        'num_layer': 64,
        'd':9216,
        'h': 72,
    },
    'facebook/opt-30b':{
        'num_layer': 48,
        'd':7168,
        'h': 56,
    },
    'facebook/opt-13b':{
        'num_layer': 40,
        'd':5120,
        'h': 40,
    },
    'facebook/opt-6.7b':{
        'num_layer': 32,
        'd':4096,
        'h': 32,
    },
    'facebook/opt-2.7b':{
        'num_layer': 32,
        'd':2560,
        'h': 32,
    },
    'facebook/opt-1.3b':{
        'num_layer': 24,
        'd':2048,
        'h': 32,
    },
    'facebook/opt-350m':{
        'num_layer': 24,
        'd':1024,
        'h': 16,
    },
    'facebook/opt-125m':{
        'num_layer': 12,
        'd':768,
        'h': 12,
    },
}

'''
import seaborn as sns
def plot_average_activation(directory_path, model_name):


    # Read all the .csv files in the specified directory and concatenate them into a single DataFrame
    files = [os.path.join(directory_path, f) for f in os.listdir(directory_path) if f.endswith('.csv')]
    df = pd.concat([pd.read_csv(f) for f in files])
    
    # Set the seaborn style for better aesthetics
    sns.set(style="whitegrid")
    
    total_neurons = OPT_CONFIGS[model_name]["d"] * 4
    # Compute the average activation percentage and standard deviation percentage
    df['average_activation_percentage'] = (df['average_activation'] / total_neurons) * 100
    df['std_activation_percentage'] = (df['std_activation'] / total_neurons) * 100

    # Create a color palette with a different color for each batch size
    palette = sns.color_palette("husl", df['batch_size'].nunique())

    # Initialize the matplotlib figure
    plt.figure(figsize=(12, 8))

    # Loop over each batch size and plot the average_activation_percentage with error bars
    for i, (batch_size, group) in enumerate(df.groupby('batch_size')):
        plt.errorbar(
            group['layer'],
            group['average_activation_percentage'],
            yerr=group['std_activation_percentage'],
            label=f'Batch Size {batch_size}',
            capsize=3,
            marker='o',
            linestyle='-',
            color=palette[i]
        )
        
    # Set y-axis ticks at every 10% increment
    plt.yticks(range(0, 101, 10))  # Y-ticks from 0% to 100% in steps of 10%
    
    # Shaded region
    plt.axhspan(80, plt.ylim()[1], facecolor='gray', alpha=0.1)
    
    # Set the labels and title of the plot
    plt.xlabel('Layer', fontsize=14)
    plt.ylabel('Average Activation Percentage (%)', fontsize=14)
    plt.title(f'Model: {model_name} Average Activation Percentage vs Layer for Different Batch Sizes', fontsize=16)
    
    # Show legend
    plt.legend(title='Batch Size', fontsize=12, title_fontsize=12)
    
    # Tight layout for better spacing
    plt.tight_layout()
    
    # Save the image
    plt.savefig('average_activation_analysis.png')
    
    # Display the plot
    plt.show()
    

'''