import os
import pandas as pd
import matplotlib.pyplot as plt
from collections import defaultdict
import numpy as np
import json
from scipy import stats
from scipy.stats import norm

CASES = ['exact', 'inexact']
plot_path = 'visualization/normal_results'
include_legend = False

def get_results():
    datasets=["xsum", "squad", "writing"]
    source_models=["gpt2-xl", "gpt-neo-2.7B"]
    
    methods2 = {
        'classification': 'AdaDetectGPT',
        }
    
    result_dir_template = 'exp_normal/results_{}'

    def get_stats(result_file):
        with open(result_file, 'r') as fin:
            res = json.load(fin)
            return res['predictions']['samples']

    def _get_method_stats(dataset, model, method, cases, filter=''):
        res_path = result_dir_template.format(cases)
        result_file = f'{res_path}/{dataset}_{model}.{method}.json'
        if os.path.exists(result_file):
            stats = np.array(get_stats(result_file))
        else:
            stats = np.array([0.0])
        return stats

    result_list = []
    for cases in CASES:
        for dataset in datasets:
            for model in source_models:
                for method in methods2:
                    results = {'datasets': dataset, 'models': model, 'cases': cases}
                    method_name = methods2[method]
                    results['methods'] = method_name
                    cols = _get_method_stats(dataset, model, method, cases)
                    results['values'] = cols
                    # print(f"{method_name} with mean {np.mean(cols)} and std {np.std(cols)} on {dataset} with cases {cases}")
                    result_list.append(results)
    
    def merge_dicts_of_lists(dataset_list: list[dict]) -> dict:
        """
        将一系列 dict(键→list) 合并为一个 dict，
        同一个键对应的 list 会被 extend 到一起。
        """
        merged = defaultdict(list)
        for d in dataset_list:
            for key, value in d.items():
                # 如果 value 本身是 list，则 extend；否则 append
                if isinstance(value, list):
                    merged[key].extend(value)
                else:
                    merged[key].append(value)
        return dict(merged)
    
    result_list = merge_dicts_of_lists(result_list)
    df = pd.DataFrame(result_list)
    df['values'] = df['values'].apply(lambda arr: arr.tolist())
    df = df.explode('values').reset_index(drop=True)
    # print(df)
    return df

def plot_hist_by_dataset_and_method(df):
    datasets = sorted(df['datasets'].unique())
    models = sorted(df['models'].unique())
    # datasets.remove('xsum')
    methods  = sorted(df['methods'].unique())

    # pick a distinct color for each cases
    N = len(CASES)
    cmap   = plt.get_cmap('viridis')
    palette = cmap(np.linspace(1, 0, N))

    fig, axes = plt.subplots(
        nrows=len(methods),
        ncols=len(datasets),
        figsize=(3.9*len(datasets), 3.3*len(methods)),
        sharey=False,
    )

    # if only one row or one col, axes may be 1-D
    if len(methods) == 1 and len(datasets) == 1:
        axes = [[axes]]
    elif len(methods) == 1:
        axes = [axes]
    elif len(datasets) == 1:
        axes = [[ax] for ax in axes]

    for i, method in enumerate(methods):
        for j, dataset in enumerate(datasets):
            ax = axes[i][j]
            sub = df[(df['methods']==method) & (df['datasets']==dataset)]

            value_list = []
            model_list = []
            for l, model in enumerate(models):
                sub2 = sub[sub['models']==model]

                # draw one histogram per cases
                for k, n in enumerate(CASES):
                    vals = sub2[sub2['cases']==n]['values']
                    value_list.append(vals)
                    model_list.append(model)

                print(f"------------ Test on {model} at {dataset} ------------")
                test_result = stats.kstest(vals.to_numpy().astype(np.float16), stats.norm.cdf)
                print("KS test: ", test_result.pvalue)
                test_result = stats.shapiro(vals.to_numpy().astype(np.float16))
                print("SW test: ", test_result.pvalue)
                test_result = stats.anderson(vals.to_numpy().astype(np.float16), dist='norm')
                print("Anderson test: ", test_result.statistic, test_result.critical_values, test_result.significance_level)
                print(f"------------------------------------------------------")
                
            _, bins, _ = ax.hist(
                value_list,
                density=True, 
                bins=12,
                alpha=0.6,
                range=(-3, 3),
                histtype='stepfilled',
                color=['#0571b0', '#f4a582'],
                label=model_list,
            )
            # overlay the standard normal curve
            x = np.linspace(bins[0], bins[-1], 200)
            y = norm.pdf(x, loc=0, scale=1)   # standard normal
            ax.plot(
                x, y,
                color='darkred',
                linestyle='--',
                linewidth=2,
            )

            ax.set_ylabel('density', fontsize=12, fontweight='bold')
            # titles & labels
            if i == 0:
                pass
            if j == 0:
                pass
            if i == len(methods)-1:
                ax.set_xlabel('statistics', fontsize=13, fontweight='bold')

    # common legend on the right
    handles, labels = axes[0][-1].get_legend_handles_labels()
    if include_legend:
        fig.legend(
            handles, labels,
            # title="cases",
            loc='lower center',
            bbox_to_anchor=(0.5, -0.017), 
            ncol=5,
        )

    plt.tight_layout()
    plt.subplots_adjust(bottom=0.24)
    plt.savefig(f'{plot_path}/normal_histogram.pdf', dpi=300, bbox_inches='tight')
    plt.show()

def plot_box_by_dataset_and_method(df):
    """
    df must have columns:
      - 'datasets' (categorical)
      - 'methods'  (categorical)
      - 'cases' (numeric, e.g. 2,4,8,16)
      - 'values'   (each entry is a 1D array or list of numbers)
    """
    datasets = sorted(df['datasets'].unique())
    methods  = sorted(df['methods'].unique())

    n_rows, n_cols = len(methods), len(datasets)
    fig, axes = plt.subplots(
        n_rows, n_cols,
        figsize=(4*n_cols, 4*n_rows),
        sharey=False
    )

    # In case of single row/col, normalize axes to 2D list
    if n_rows == 1 and n_cols == 1:
        axes = [[axes]]
    elif n_rows == 1:
        axes = [axes]
    elif n_cols == 1:
        axes = [[ax] for ax in axes]

    # pick a color for each cases
    N = len(CASES)
    cmap   = plt.get_cmap('viridis')
    palette = cmap(np.linspace(1, 0, N))

    for i, method in enumerate(methods):
        for j, dataset in enumerate(datasets):
            ax = axes[i][j]
            sub = df[(df['datasets']==dataset) & (df['methods']==method)]

            # build list-of-lists for each prompt
            data_list = []
            for n in CASES:
                block = sub[sub['cases']==n]
                print("KS test for ", n, f"at {dataset} {method}")
                print(stats.kstest(block['values'], stats.norm.cdf))
                data_list.append(block['values'])

            # draw the boxplots at positions 0,1,2,3
            bp = ax.boxplot(
                data_list,
                positions=range(len(CASES)),
                patch_artist=True,
                widths=0.6,
                medianprops=dict(color="black")
            )
            # color them
            for patch, color in zip(bp['boxes'], palette):
                patch.set_facecolor(color)
                patch.set_alpha(0.7)

            # x‐ticks & labels
            ax.set_xticks(list(range(len(CASES))))
            ax.set_xticklabels([str(n) for n in CASES])
            ax.set_xlabel("cases")
            if j == 0:
                ax.set_ylabel(method, fontweight="bold")
            if i == 0:
                pass
                ax.set_title(dataset, fontweight="bold")

    # shared legend in upper right
    handles = [
        plt.Line2D([0],[0], color=palette[k], marker='s', linestyle='', alpha=0.7)
        for k in range(len(CASES))
    ]
    labels = [f"cases={n}" for n in CASES]
    fig.legend(
        handles, labels,
        # title="cases",
        loc='lower center',
        bbox_to_anchor=(0.5, 0.0), 
        ncol=5,
    )

    plt.tight_layout()
    plt.subplots_adjust(bottom=0.2)  # leave extra space at the bottom
    plt.savefig(f'{plot_path}/normal_boxplot.pdf', dpi=300, bbox_inches='tight')
    plt.show()

if __name__ == '__main__':
    df = get_results()
    plot_hist_by_dataset_and_method(df)