import math
import yaml
import pandas as pd
from pathlib import Path

# AUP calculation (from d3LLM_Code/aup_utils.py)
def weight_function(y: float, y_max: float, alpha: float = 3.0) -> float:
    """Quality-weighting function W(y) = min(exp(-alpha * (1 - y/y_max)), 1)"""
    return min(math.exp(-alpha * (1 - y / y_max)), 1.0)

def get_aup(rho: list, y: list, y_max: float, alpha: float = 3.0, y_min_offset: float = 5.0) -> float:
    """Calculate AUP (Accuracy Under Parallelism) score."""
    if len(rho) == 0:
        return 0.0
    sorted_pairs = sorted(zip(rho, y), key=lambda x: x[0])
    sorted_rho, sorted_y = zip(*sorted_pairs)
    sorted_rho, sorted_y = list(sorted_rho), list(sorted_y)
    
    y_1 = sorted_y[0]
    y_min = y_1 - y_min_offset
    filtered_pairs = [(r, acc) for r, acc in zip(sorted_rho, sorted_y) if acc >= y_min]
    if len(filtered_pairs) == 0:
        return sorted_rho[0] * sorted_y[0]
    
    filtered_rho, filtered_y = zip(*filtered_pairs)
    filtered_rho, filtered_y = list(filtered_rho), list(filtered_y)
    
    aup = filtered_rho[0] * filtered_y[0]
    for i in range(1, len(filtered_rho)):
        y_i, y_prev = filtered_y[i], filtered_y[i-1]
        w_i = weight_function(y_i, y_max, alpha)
        w_prev = weight_function(y_prev, y_max, alpha)
        aup += 0.5 * (filtered_rho[i] - filtered_rho[i-1]) * (y_i * w_i + y_prev * w_prev)
    return aup

DATA_DIR = Path(__file__).parent.parent.parent / "d3LLM_Code"
DATA_FILES = ["data_dream.yaml", "data_llada.yaml", "data_dream_coder.yaml"]

# Merge HumanEval-Instruct -> HumanEval, MBPP-Instruct -> MBPP; exclude HumanEval+, MBPP+
TASK_MERGE = {"HumanEval-Instruct": "HumanEval", "MBPP-Instruct": "MBPP"}
TASK_EXCLUDE = {"HumanEval+", "MBPP+"}
TASK_ORDER = ["GSM8K-CoT", "MATH", "MBPP", "HumanEval", "Long-GSM8K"]
AVG_AUP_DIVISOR = 5

def load_yaml_data():
    """Load YAML files separately, compute y_max per file/task, then merge."""
    all_results = {}  # {method: {task: (aup, tpf, acc)}}
    all_meta = {}
    all_tasks = set()
    raw_data = {}  # {task: {method: [(rho, y), ...]}} for curve plotting
    
    for filename in DATA_FILES:
        filepath = DATA_DIR / filename
        if not filepath.exists():
            continue
        with open(filepath, 'r') as f:
            data = yaml.safe_load(f)
        
        meta = data.pop('_meta', {})
        all_meta.update(meta)
        
        # Compute y_max per task WITHIN this file only (as per main.py)
        file_tasks = {k: v for k, v in data.items() if k not in TASK_EXCLUDE}
        y_max_per_task = {}
        for task, methods in file_tasks.items():
            y_max_per_task[task] = max(y for pairs in methods.values() for _, y in pairs)
        
        # Calculate AUP for each method/task in this file
        for task, methods in file_tasks.items():
            target_task = TASK_MERGE.get(task, task)
            all_tasks.add(target_task)
            y_max = y_max_per_task[task]
            
            # Store raw data for curve plotting
            if target_task not in raw_data:
                raw_data[target_task] = {}
            
            for method, pairs in methods.items():
                if method not in all_results:
                    all_results[method] = {}
                
                rho_list = [p[0] for p in pairs]
                y_list = [p[1] for p in pairs]
                aup = get_aup(rho_list, y_list, y_max)
                tpf = max(rho_list)
                acc = pairs[0][1] if len(pairs) == 1 else [p[1] for p in pairs if p[0] == max(rho_list)][0]
                all_results[method][target_task] = (round(aup, 1), round(tpf, 2), round(acc, 1))
                raw_data[target_task][method] = pairs
    
    # Return tasks in specified order
    ordered_tasks = [t for t in TASK_ORDER if t in all_tasks]
    return all_results, all_meta, ordered_tasks, raw_data

def compute_leaderboard():
    """Compute leaderboard DataFrame from YAML data."""
    results_dict, meta, tasks, raw_data = load_yaml_data()
    
    results = []
    for method in sorted(results_dict.keys()):
        method_meta = meta.get(method, {})
        row = {
            "Method": method,
            "Type": method_meta.get("type", "?"),
            "Foundation": method_meta.get("foundation", "?"),
            "Link": method_meta.get("link", ""),
        }
        
        aup_sum = 0.0
        for task in tasks:
            if task in results_dict[method]:
                aup, tpf, acc = results_dict[method][task]
                row[f"{task}_AUP"], row[f"{task}_TPF"], row[f"{task}_Acc"] = aup, tpf, acc
                aup_sum += aup
            else:
                row[f"{task}_AUP"] = row[f"{task}_TPF"] = row[f"{task}_Acc"] = None
        
        row["Avg_AUP"] = round(aup_sum / AVG_AUP_DIVISOR, 1)
        results.append(row)
    
    df = pd.DataFrame(results).sort_values("Avg_AUP", ascending=False).reset_index(drop=True)
    return df, tasks, raw_data

def get_leaderboard_df():
    df, _, _ = compute_leaderboard()
    return df

def get_tasks():
    _, tasks, _ = compute_leaderboard()
    return tasks

def get_raw_data():
    _, _, raw_data = compute_leaderboard()
    return raw_data