ThaoTran7's picture
incomplete commit
485127c
import os
import pandas as pd
import matplotlib.pyplot as plt
from collections import defaultdict
import numpy as np
import json
from scipy import stats
from scipy.stats import norm
CASES = ['exact', 'inexact']
plot_path = 'visualization/normal_results'
include_legend = False
def get_results():
datasets=["xsum", "squad", "writing"]
source_models=["gpt2-xl", "gpt-neo-2.7B"]
methods2 = {
'classification': 'AdaDetectGPT',
}
result_dir_template = 'exp_normal/results_{}'
def get_stats(result_file):
with open(result_file, 'r') as fin:
res = json.load(fin)
return res['predictions']['samples']
def _get_method_stats(dataset, model, method, cases, filter=''):
res_path = result_dir_template.format(cases)
result_file = f'{res_path}/{dataset}_{model}.{method}.json'
if os.path.exists(result_file):
stats = np.array(get_stats(result_file))
else:
stats = np.array([0.0])
return stats
result_list = []
for cases in CASES:
for dataset in datasets:
for model in source_models:
for method in methods2:
results = {'datasets': dataset, 'models': model, 'cases': cases}
method_name = methods2[method]
results['methods'] = method_name
cols = _get_method_stats(dataset, model, method, cases)
results['values'] = cols
# print(f"{method_name} with mean {np.mean(cols)} and std {np.std(cols)} on {dataset} with cases {cases}")
result_list.append(results)
def merge_dicts_of_lists(dataset_list: list[dict]) -> dict:
"""
将一系列 dict(键→list) 合并为一个 dict,
同一个键对应的 list 会被 extend 到一起。
"""
merged = defaultdict(list)
for d in dataset_list:
for key, value in d.items():
# 如果 value 本身是 list,则 extend;否则 append
if isinstance(value, list):
merged[key].extend(value)
else:
merged[key].append(value)
return dict(merged)
result_list = merge_dicts_of_lists(result_list)
df = pd.DataFrame(result_list)
df['values'] = df['values'].apply(lambda arr: arr.tolist())
df = df.explode('values').reset_index(drop=True)
# print(df)
return df
def plot_hist_by_dataset_and_method(df):
datasets = sorted(df['datasets'].unique())
models = sorted(df['models'].unique())
# datasets.remove('xsum')
methods = sorted(df['methods'].unique())
# pick a distinct color for each cases
N = len(CASES)
cmap = plt.get_cmap('viridis')
palette = cmap(np.linspace(1, 0, N))
fig, axes = plt.subplots(
nrows=len(methods),
ncols=len(datasets),
figsize=(3.9*len(datasets), 3.3*len(methods)),
sharey=False,
)
# if only one row or one col, axes may be 1-D
if len(methods) == 1 and len(datasets) == 1:
axes = [[axes]]
elif len(methods) == 1:
axes = [axes]
elif len(datasets) == 1:
axes = [[ax] for ax in axes]
for i, method in enumerate(methods):
for j, dataset in enumerate(datasets):
ax = axes[i][j]
sub = df[(df['methods']==method) & (df['datasets']==dataset)]
value_list = []
model_list = []
for l, model in enumerate(models):
sub2 = sub[sub['models']==model]
# draw one histogram per cases
for k, n in enumerate(CASES):
vals = sub2[sub2['cases']==n]['values']
value_list.append(vals)
model_list.append(model)
print(f"------------ Test on {model} at {dataset} ------------")
test_result = stats.kstest(vals.to_numpy().astype(np.float16), stats.norm.cdf)
print("KS test: ", test_result.pvalue)
test_result = stats.shapiro(vals.to_numpy().astype(np.float16))
print("SW test: ", test_result.pvalue)
test_result = stats.anderson(vals.to_numpy().astype(np.float16), dist='norm')
print("Anderson test: ", test_result.statistic, test_result.critical_values, test_result.significance_level)
print(f"------------------------------------------------------")
_, bins, _ = ax.hist(
value_list,
density=True,
bins=12,
alpha=0.6,
range=(-3, 3),
histtype='stepfilled',
color=['#0571b0', '#f4a582'],
label=model_list,
)
# overlay the standard normal curve
x = np.linspace(bins[0], bins[-1], 200)
y = norm.pdf(x, loc=0, scale=1) # standard normal
ax.plot(
x, y,
color='darkred',
linestyle='--',
linewidth=2,
)
ax.set_ylabel('density', fontsize=12, fontweight='bold')
# titles & labels
if i == 0:
pass
if j == 0:
pass
if i == len(methods)-1:
ax.set_xlabel('statistics', fontsize=13, fontweight='bold')
# common legend on the right
handles, labels = axes[0][-1].get_legend_handles_labels()
if include_legend:
fig.legend(
handles, labels,
# title="cases",
loc='lower center',
bbox_to_anchor=(0.5, -0.017),
ncol=5,
)
plt.tight_layout()
plt.subplots_adjust(bottom=0.24)
plt.savefig(f'{plot_path}/normal_histogram.pdf', dpi=300, bbox_inches='tight')
plt.show()
def plot_box_by_dataset_and_method(df):
"""
df must have columns:
- 'datasets' (categorical)
- 'methods' (categorical)
- 'cases' (numeric, e.g. 2,4,8,16)
- 'values' (each entry is a 1D array or list of numbers)
"""
datasets = sorted(df['datasets'].unique())
methods = sorted(df['methods'].unique())
n_rows, n_cols = len(methods), len(datasets)
fig, axes = plt.subplots(
n_rows, n_cols,
figsize=(4*n_cols, 4*n_rows),
sharey=False
)
# In case of single row/col, normalize axes to 2D list
if n_rows == 1 and n_cols == 1:
axes = [[axes]]
elif n_rows == 1:
axes = [axes]
elif n_cols == 1:
axes = [[ax] for ax in axes]
# pick a color for each cases
N = len(CASES)
cmap = plt.get_cmap('viridis')
palette = cmap(np.linspace(1, 0, N))
for i, method in enumerate(methods):
for j, dataset in enumerate(datasets):
ax = axes[i][j]
sub = df[(df['datasets']==dataset) & (df['methods']==method)]
# build list-of-lists for each prompt
data_list = []
for n in CASES:
block = sub[sub['cases']==n]
print("KS test for ", n, f"at {dataset} {method}")
print(stats.kstest(block['values'], stats.norm.cdf))
data_list.append(block['values'])
# draw the boxplots at positions 0,1,2,3
bp = ax.boxplot(
data_list,
positions=range(len(CASES)),
patch_artist=True,
widths=0.6,
medianprops=dict(color="black")
)
# color them
for patch, color in zip(bp['boxes'], palette):
patch.set_facecolor(color)
patch.set_alpha(0.7)
# x‐ticks & labels
ax.set_xticks(list(range(len(CASES))))
ax.set_xticklabels([str(n) for n in CASES])
ax.set_xlabel("cases")
if j == 0:
ax.set_ylabel(method, fontweight="bold")
if i == 0:
pass
ax.set_title(dataset, fontweight="bold")
# shared legend in upper right
handles = [
plt.Line2D([0],[0], color=palette[k], marker='s', linestyle='', alpha=0.7)
for k in range(len(CASES))
]
labels = [f"cases={n}" for n in CASES]
fig.legend(
handles, labels,
# title="cases",
loc='lower center',
bbox_to_anchor=(0.5, 0.0),
ncol=5,
)
plt.tight_layout()
plt.subplots_adjust(bottom=0.2) # leave extra space at the bottom
plt.savefig(f'{plot_path}/normal_boxplot.pdf', dpi=300, bbox_inches='tight')
plt.show()
if __name__ == '__main__':
df = get_results()
plot_hist_by_dataset_and_method(df)