learn / test_time_scaling /plot_reward_perplexity.py

Upload folder using huggingface_hub

a80f6e6 verified 10 months ago

1.48 kB

	import json
	import numpy as np
	import matplotlib.pyplot as plt
	from sklearn.linear_model import LinearRegression
	import os
	import glob

	# 自动查找所有子目录下的reward_perplexity_stats.json
	stat_files = glob.glob(os.path.join('.', '**', 'reward_perplexity_stats.json'), recursive=True)

	for stat_file in stat_files:
	with open(stat_file) as f:
	stats = json.load(f)
	rewards = np.array(stats["step_rewards"])
	rewards = rewards - np.mean(rewards)
	rewards = rewards / np.std(rewards)
	perplexities = np.array(stats["step_perplexities"])
	inv_ppl = 1 / perplexities
	# inv_ppl = perplexities
	# 计算皮尔逊相关系数R
	if len(inv_ppl) > 1 and np.std(inv_ppl) > 0 and np.std(rewards) > 0:
	R = np.corrcoef(inv_ppl, rewards)[0, 1]
	else:
	R = float('nan')
	# 画散点图
	plt.figure(figsize=(8,6))
	plt.scatter(inv_ppl, rewards, alpha=0.2)
	plt.xlabel("1 / Perplexity")
	plt.ylabel("Reward (0/1)")
	plt.title(f"Reward vs. Inverse Perplexity\n{stat_file}\nR={R:.3f}")
	# 拟合线性关系
	X = inv_ppl.reshape(-1, 1)
	y = rewards
	reg = LinearRegression().fit(X, y)
	y_pred = reg.predict(X)
	plt.plot(inv_ppl, y_pred, color='red', label='Linear fit')
	plt.legend()
	plt.tight_layout()
	plt.savefig(stat_file.replace('.json', '_reward_vs_invperplexity.png'))
	plt.close()
	print(f"{stat_file}: Linear fit coef: {reg.coef_}, intercept: {reg.intercept_}, R: {R:.3f}")