#!/usr/bin/env python3
"""
Per-neuron activation tracker for LLaMA-2 and Qwen MLP layers.
Runs on a fixed set of models and multiple input ID files per model.
"""

import torch
import os
from types import MethodType
from vllm import LLM, SamplingParams  # Keep original import since hook logic depends on vLLM

# ---------------------- Config ----------------------
BASE_PATH = "/home/khanh/sla/sla_cpt"
ID_BASE_PATH = "./oscar_ids"

RUN_CONFIGS = [
    # {
    #     'name': 'l2-13b',
    #     'model': f'{BASE_PATH}/uccix/checkpoint-4280',
    #     'ids_list': [
    #         {"path": './ids/l2-13b/id.ga.train.l2-13b', "lang": "ga"},
    #         {"path": './ids/l2-13b/id.en.train.l2-13b', "lang": "en"}
    #     ],
    #     'type': 'llama'
    # },
    # {
    #     'name': 'l2-7b',
    #     'model': f'{BASE_PATH}/llama2_7b_full_basque_corpus_grad_clip_1/checkpoint-10200',
    #     'ids_list': [
    #         {"path": './ids/l2-7b/id.eu.train.l2-7b', "lang": "eu"},
    #         {"path": './ids/l2-7b/id.en.train.l2-7b', "lang": "en"}
    #     ],
    #     'type': 'llama'
    # },
    {
        'name': 'q2.5-zh',
        'model': f'{BASE_PATH}/qwen2.5-0.5b_english_wiki_750M_chinese_wikipedia_corpus_2e_240925/checkpoint-2944',
        'ids_list': [
            {"path": f'{ID_BASE_PATH}/q2.5/id.zh.train.qwen2.5-0.5', "lang": "zh"},
            {"path": f'{ID_BASE_PATH}/q2.5/id.en.train.qwen2.5-0.5', "lang": "en"}
        ],
        'type': 'qwen'
    },
    # {
    #     'name': 'q2.5-en+zh',
    #     'model': f'{BASE_PATH}/qwen2.5-0.5b_english_wiki_150M_en_750M_chinese_wikipedia_corpus_2e_240925/checkpoint-3494',
    #     'ids_list': [
    #         {"path": '{ID_BASE_PATH}/q2.5/id.zh.train.qwen2.5-0.5', "lang": "zh"},
    #         {"path": '{ID_BASE_PATH}/q2.5/id.en.train.qwen2.5-0.5', "lang": "en"}
    #     ],
    #     'type': 'qwen'
    # },
    # {
    #     'name': 'q2.5-ga',
    #     'model': f'{BASE_PATH}/qwen2.5-0.5b_english_wiki_1.5B_irish_corpus_240925/checkpoint-2854',
    #     'ids_list': [
    #         {"path": '{ID_BASE_PATH}/q2.5/id.en.train.qwen2.5-0.5', "lang": "en"},
    #         {"path": '{ID_BASE_PATH}/q2.5/id.ga.train.qwen2.5-0.5', "lang": "ga"}
    #     ],
    #     'type': 'qwen'
    # },
    # # {
    # #     'name': 'q2.5-en+ga',
    # #     'model': f'{BASE_PATH}/qwen2.5-0.5_full_english_corpus_grad_clip_1/checkpoint-3231',
    # #     'ids_list': [
    # #         {"path": './ids/qwen2.5-0.5/id.en.train.qwen2.5-0.5', "lang": "en"},
    # #         {"path": './ids/qwen2.5-0.5/id.ga.train.qwen2.5-0.5', "lang": "ga"}
    # #     ],
    # #     'type': 'qwen'
    # # },
    # {
    #     'name': 'q2.5-eu',
    #     'model': f'{BASE_PATH}/qwen2.5-0.5b_english_wiki_1.5Bbasque_corpus_240925/checkpoint-2424',
    #     'ids_list': [
    #         {"path": '{ID_BASE_PATH}/q2.5/id.eu.train.qwen2.5-0.5', "lang": "eu"},
    #         {"path": '{ID_BASE_PATH}/q2.5/id.en.train.qwen2.5-0.5', "lang": "en"}
    #     ],
    #     'type': 'qwen'
    # },
    # {
    #     'name': 'q2.5-en+eu',
    #     'model': f'{BASE_PATH}/qwen2.5-0.5_full_basque_corpus_grad_clip_1/checkpoint-7800',
    #     'ids_list': [
    #         {"path": './ids/qwen2.5-0.5/id.eu.train.qwen2.5-0.5', "lang": "eu"},
    #         {"path": './ids/qwen2.5-0.5/id.en.train.qwen2.5-0.5', "lang": "en"}
    #     ],
    # }
]

SAVE_FOLDER = "new_activations"
os.makedirs(SAVE_FOLDER, exist_ok=True)

# ---------------------- Hook Functions ----------------------
def make_llama_hook(idx):
    def llama_forward(self, x):
        gate_up, _ = self.gate_up_proj(x)  # l, 2i
        i = gate_up.size(-1)
        gate_up[:, : i // 2] = torch.nn.SiLU()(gate_up[:, : i // 2])
        activation = gate_up[:, : i // 2].float()  # l, i
        over_zero[idx, :] += (activation > 0).sum(dim=0)
        x = gate_up[:, : i // 2] * gate_up[:, i // 2 :]
        x, _ = self.down_proj(x)
        return x
    return llama_forward

def make_qwen_hook(idx):
    def qwen_forward(self, x):
        gate_up, _ = self.gate_up_proj(x)  # (s, 2h)
        intermediate_size = gate_up.size(-1) // 2
        gate = gate_up[..., :intermediate_size]  # (s, h)
        up = gate_up[..., intermediate_size:]    # (s, h)
        gate_activation = torch.nn.functional.silu(gate)
        over_zero[idx, :] += (gate_activation > 0).sum(dim=0)
        x, _ = self.down_proj(gate_activation * up)
        return x
    return qwen_forward

# ---------------------- Run All Configs ----------------------
for config in RUN_CONFIGS:
    model_name = config['model']
    save_name = config.get('name', model_name)
    model_type = config.get('type', 'llama')
    ids_list = config.get('ids_list', [])

    print(f"\n=== Processing model: {model_name}, type: {model_type} ===")

    # Load model
    model = LLM(
        model=model_name,
        tensor_parallel_size=1,
        enforce_eager=True,
        trust_remote_code=True
    )

    max_length = model.llm_engine.model_config.max_model_len
    num_layers = model.llm_engine.model_config.hf_config.num_hidden_layers
    intermediate_size = model.llm_engine.model_config.hf_config.intermediate_size

    print(f"Layers: {num_layers}, Intermediate size: {intermediate_size}, Max length: {max_length}")

    # Setup activation tracker
    over_zero = torch.zeros(num_layers, intermediate_size, dtype=torch.int32).to('cuda')

    # Hook MLP layers
    for i in range(num_layers):
        mlp = model.llm_engine.model_executor.driver_worker.model_runner.model.model.layers[i].mlp
        if model_type == 'llama':
            mlp.forward = MethodType(make_llama_hook(i), mlp)
        elif model_type == 'qwen':
            mlp.forward = MethodType(make_qwen_hook(i), mlp)
        else:
            raise ValueError(f"Unknown model type: {model_type}")

    # Iterate over all ID files
    for id_dict in ids_list:
        ids_path = id_dict['path']
        lang = id_dict.get('lang', 'unknown')  # Use lang in dict for output filename

        print(f"\nLoading IDs from {ids_path} (lang: {lang})...")
        ids = torch.load(ids_path)
        print(f"ID shape: {ids.shape}")

        l = ids.size(0)
        l = min(l, 99999744) // max_length * max_length
        input_ids = ids[:l].reshape(-1, max_length)
        print(f"Processing {input_ids.size(0)} sequences of length {max_length}")

        # Run inference
        print("Running inference...")
        _ = model.generate(
            prompt_token_ids=input_ids.tolist(),
            sampling_params=SamplingParams(max_tokens=1)
        )

        # Save results for this ID file
        output_path = os.path.join(SAVE_FOLDER, f'activation.{lang}.train.{save_name}.pt')
        torch.save({
            'n': l,
            'over_zero': over_zero.cpu(),
            'num_layers': num_layers,
            'intermediate_size': intermediate_size
        }, output_path)

        print(f"Saved activation counts to {output_path}")
        print(f"Processed {l} tokens total")

    print(f"\nActivation analysis complete for model: {save_name}!")

    del model
    torch.cuda.empty_cache()
    import gc
    gc.collect()