Chordia / src /models /metrics.py
Corolin's picture
first commit
0a6452f
"""
评估指标模块
Metrics for PAD Predictor Evaluation
该模块包含了PAD预测器的各种评估指标,包括:
- 回归指标:MAE、RMSE、R²
- 置信度评估指标:ECE(Expected Calibration Error)
- 可靠性图表功能
"""
import torch
import torch.nn.functional as F
import numpy as np
from typing import Dict, List, Tuple, Optional, Any
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import logging
class RegressionMetrics:
"""回归评估指标类"""
def __init__(self):
self.logger = logging.getLogger(__name__)
@staticmethod
def mae(y_true: torch.Tensor, y_pred: torch.Tensor, reduction: str = 'mean') -> torch.Tensor:
"""
平均绝对误差 (Mean Absolute Error)
Args:
y_true: 真实值
y_pred: 预测值
reduction: 聚合方式 ('mean', 'sum', 'none')
Returns:
MAE值
"""
mae = torch.mean(torch.abs(y_pred - y_true), dim=0)
if reduction == 'mean':
return torch.mean(mae)
elif reduction == 'sum':
return torch.sum(mae)
else:
return mae
@staticmethod
def rmse(y_true: torch.Tensor, y_pred: torch.Tensor, reduction: str = 'mean') -> torch.Tensor:
"""
均方根误差 (Root Mean Square Error)
Args:
y_true: 真实值
y_pred: 预测值
reduction: 聚合方式 ('mean', 'sum', 'none')
Returns:
RMSE值
"""
mse = torch.mean((y_pred - y_true) ** 2, dim=0)
rmse = torch.sqrt(mse)
if reduction == 'mean':
return torch.mean(rmse)
elif reduction == 'sum':
return torch.sum(rmse)
else:
return rmse
@staticmethod
def r2_score(y_true: torch.Tensor, y_pred: torch.Tensor, reduction: str = 'mean') -> torch.Tensor:
"""
R²决定系数 (Coefficient of Determination)
Args:
y_true: 真实值
y_pred: 预测值
reduction: 聚合方式 ('mean', 'sum', 'none')
Returns:
R²值
"""
# 计算总平方和
ss_tot = torch.sum((y_true - torch.mean(y_true, dim=0)) ** 2, dim=0)
# 计算残差平方和
ss_res = torch.sum((y_true - y_pred) ** 2, dim=0)
# 避免除零
r2 = 1 - (ss_res / (ss_tot + 1e-8))
if reduction == 'mean':
return torch.mean(r2)
elif reduction == 'sum':
return torch.sum(r2)
else:
return r2
@staticmethod
def robust_r2(y_true: torch.Tensor, y_pred: torch.Tensor) -> torch.Tensor:
"""
稳健R²决定系数(Robust R² for Multi-Output Regression)
先对所有维度求和SS_res和SS_tot,然后计算一个总的R²。
这种方法更适合多目标回归,因为它考虑了所有目标的总方差。
公式:R²_robust = 1 - Σ(SS_res_all) / Σ(SS_tot_all)
Args:
y_true: 真实值,形状为 (batch_size, output_dim)
y_pred: 预测值,形状为 (batch_size, output_dim)
Returns:
稳健R²值(标量)
"""
# 对所有维度和样本求和的残差平方和
ss_res_total = torch.sum((y_true - y_pred) ** 2)
# 对所有维度和样本求和的总平方和
ss_tot_total = torch.sum((y_true - torch.mean(y_true, dim=0)) ** 2)
# 避免除零
r2_robust = 1 - (ss_res_total / (ss_tot_total + 1e-8))
return r2_robust
@staticmethod
def mape(y_true: torch.Tensor, y_pred: torch.Tensor, reduction: str = 'mean') -> torch.Tensor:
"""
平均绝对百分比误差 (Mean Absolute Percentage Error)
Args:
y_true: 真实值
y_pred: 预测值
reduction: 聚合方式
Returns:
MAPE值
"""
# 避免除零
mape = torch.mean(torch.abs((y_pred - y_true) / (y_true + 1e-8)), dim=0)
if reduction == 'mean':
return torch.mean(mape)
elif reduction == 'sum':
return torch.sum(mape)
else:
return mape
def compute_all_metrics(self,
y_true: torch.Tensor,
y_pred: torch.Tensor,
component_names: List[str] = None) -> Dict[str, Dict[str, float]]:
"""
计算所有回归指标
Args:
y_true: 真实值,形状为 (batch_size, output_dim)
y_pred: 预测值,形状为 (batch_size, output_dim)
component_names: 组件名称列表
Returns:
包含所有指标的嵌套字典
"""
if component_names is None:
component_names = ['delta_pad_p', 'delta_pad_a', 'delta_pad_d'] # 3维输出(移除confidence和delta_pressure)
metrics = {}
# 计算整体指标
metrics['overall'] = {
'mae': self.mae(y_true, y_pred).item(),
'rmse': self.rmse(y_true, y_pred).item(),
'r2': self.r2_score(y_true, y_pred).item(),
'r2_robust': self.robust_r2(y_true, y_pred).item(), # 新增稳健R²
'mape': self.mape(y_true, y_pred).item()
}
# 计算各组件指标
component_metrics = {}
for i, name in enumerate(component_names):
if i < y_true.size(1):
component_metrics[name] = {
'mae': self.mae(y_true[:, i], y_pred[:, i]).item(),
'rmse': self.rmse(y_true[:, i], y_pred[:, i]).item(),
'r2': self.r2_score(y_true[:, i], y_pred[:, i]).item(),
'mape': self.mape(y_true[:, i], y_pred[:, i]).item()
}
metrics['components'] = component_metrics
return metrics
def print_diagnostic_metrics(self,
y_true: torch.Tensor,
y_pred: torch.Tensor,
component_names: List[str] = None) -> None:
"""
打印诊断模式下的详细指标(每个维度的独立得分)
Args:
y_true: 真实值,形状为 (batch_size, output_dim)
y_pred: 预测值,形状为 (batch_size, output_dim)
component_names: 组件名称列表
"""
if component_names is None:
component_names = ['ΔPAD_P', 'ΔPAD_A', 'ΔPAD_D'] # 3维输出
print("\n" + "="*80)
print("🔍 诊断模式:各维度独立指标")
print("="*80)
# 计算稳健R²
r2_robust = self.robust_r2(y_true, y_pred).item()
r2_mean = self.r2_score(y_true, y_pred).item()
print(f"\n📊 整体指标:")
print(f" 稳健 R² (Robust R²): {r2_robust:.6f} ← 所有维度总方差比")
print(f" 平均 R² (Mean R²) : {r2_mean:.6f} ← 各维度R²的算术平均")
print(f" 差异 : {r2_robust - r2_mean:+.6f}")
print(f"\n📐 各维度详细指标:")
print(f"{'维度':<15} {'R²':<12} {'MAE':<12} {'RMSE':<12} {'MAPE':<12}")
print("-" * 80)
for i, name in enumerate(component_names):
if i < y_true.size(1):
mae = self.mae(y_true[:, i], y_pred[:, i]).item()
rmse = self.rmse(y_true[:, i], y_pred[:, i]).item()
r2 = self.r2_score(y_true[:, i], y_pred[:, i]).item()
mape = self.mape(y_true[:, i], y_pred[:, i]).item()
# R²值颜色标记
r2_str = f"{r2:.6f}"
if r2 >= 0.8:
r2_str = f"✅ {r2_str}"
elif r2 >= 0.5:
r2_str = f"⚠️ {r2_str}"
else:
r2_str = f"❌ {r2_str}"
print(f"{name:<15} {r2_str:<12} {mae:<12.6f} {rmse:<12.6f} {mape:<12.6f}")
print("="*80 + "\n")
class CalibrationMetrics:
"""置信度校准评估指标类"""
def __init__(self, n_bins: int = 10):
"""
初始化校准指标
Args:
n_bins: 分箱数量
"""
self.n_bins = n_bins
self.logger = logging.getLogger(__name__)
def expected_calibration_error(self,
predictions: torch.Tensor,
targets: torch.Tensor,
confidences: torch.Tensor) -> Tuple[float, List[Tuple]]:
"""
计算期望校准误差 (Expected Calibration Error)
Args:
predictions: 预测值,形状为 (batch_size, 4)
targets: 真实值,形状为 (batch_size, 4)
confidences: 置信度,形状为 (batch_size, 1)
Returns:
ECE值和分箱信息
"""
# 计算预测误差
errors = torch.mean((predictions - targets) ** 2, dim=1, keepdim=True)
# 将置信度归一化到[0,1]
confidences_norm = torch.sigmoid(confidences)
# 分箱
bin_boundaries = torch.linspace(0, 1, self.n_bins + 1)
bin_lowers = bin_boundaries[:-1]
bin_uppers = bin_boundaries[1:]
ece = torch.tensor(0.0, device=confidences_norm.device)
bin_info = []
for bin_lower, bin_upper in zip(bin_lowers, bin_uppers):
# 找到在当前分箱中的样本
in_bin = (confidences_norm > bin_lower) & (confidences_norm <= bin_upper)
prop_in_bin = in_bin.float().mean()
if prop_in_bin > 0:
# 计算当前分箱的平均置信度和平均误差
avg_confidence_in_bin = confidences_norm[in_bin].mean()
avg_error_in_bin = errors[in_bin].mean()
# 计算ECE贡献
ece += torch.abs(avg_confidence_in_bin - avg_error_in_bin) * prop_in_bin
bin_info.append({
'bin_lower': bin_lower.item(),
'bin_upper': bin_upper.item(),
'count': in_bin.sum().item(),
'avg_confidence': avg_confidence_in_bin.item(),
'avg_error': avg_error_in_bin.item(),
'accuracy': (1 - avg_error_in_bin).item()
})
return ece.item(), bin_info
def reliability_diagram(self,
predictions: torch.Tensor,
targets: torch.Tensor,
confidences: torch.Tensor,
save_path: Optional[str] = None) -> None:
"""
绘制可靠性图表
Args:
predictions: 预测值
targets: 真实值
confidences: 置信度
save_path: 保存路径
"""
ece, bin_info = self.expected_calibration_error(predictions, targets, confidences)
# 提取分箱信息
bin_lowers = [info['bin_lower'] for info in bin_info]
bin_uppers = [info['bin_upper'] for info in bin_info]
avg_confidences = [info['avg_confidence'] for info in bin_info]
accuracies = [info['accuracy'] for info in bin_info]
counts = [info['count'] for info in bin_info]
# 计算分箱中心
bin_centers = [(lower + upper) / 2 for lower, upper in zip(bin_lowers, bin_uppers)]
# 创建图表
plt.figure(figsize=(10, 6))
# 绘制可靠性图表
plt.plot([0, 1], [0, 1], 'k--', label='Perfect Calibration')
plt.plot(bin_centers, accuracies, 'bo-', label='Model', linewidth=2, markersize=8)
# 添加柱状图显示样本数量
ax2 = plt.gca().twinx()
ax2.bar(bin_centers, counts, width=0.1, alpha=0.3, color='gray', label='Sample Count')
ax2.set_ylabel('Sample Count', fontsize=12)
ax2.set_ylim(0, max(counts) * 1.2 if counts else 1)
# 设置图表属性
plt.xlabel('Confidence', fontsize=12)
plt.ylabel('Accuracy', fontsize=12)
plt.title(f'Reliability Diagram (ECE = {ece:.4f})', fontsize=14)
plt.legend(loc='upper left')
plt.grid(True, alpha=0.3)
plt.xlim(0, 1)
plt.ylim(0, 1)
# 保存图表
if save_path:
plt.savefig(save_path, dpi=300, bbox_inches='tight')
self.logger.info(f"可靠性图表已保存到: {save_path}")
plt.show()
def sharpness(self, confidences: torch.Tensor) -> float:
"""
计算置信度的锐度 (Sharpness)
Args:
confidences: 置信度
Returns:
锐度值(置信度的标准差)
"""
confidences_norm = torch.sigmoid(confidences)
return torch.std(confidences_norm).item()
class PADMetrics:
"""PAD特定的评估指标类"""
def __init__(self):
self.regression_metrics = RegressionMetrics()
self.calibration_metrics = CalibrationMetrics()
self.logger = logging.getLogger(__name__)
def evaluate_predictions(self,
predictions: torch.Tensor,
targets: torch.Tensor,
component_names: List[str] = None) -> Dict[str, Any]:
"""
全面评估预测结果
Args:
predictions: 预测值,形状为 (batch_size, 4) 或 (4,)
targets: 真实值,形状为 (batch_size, 4) 或 (4,)
component_names: 组件名称列表
Returns:
包含所有评估指标的字典
"""
if component_names is None:
component_names = ['delta_pad_p', 'delta_pad_a', 'delta_pad_d'] # 3维输出
# 确保张量至少是2维的
if predictions.dim() == 1:
predictions = predictions.unsqueeze(0)
if targets.dim() == 1:
targets = targets.unsqueeze(0)
results = {}
# 1. 回归指标
regression_results = self.regression_metrics.compute_all_metrics(
predictions, targets, component_names
)
results['regression'] = regression_results
# 添加稳健R²到顶层结果中方便访问
results['r2_robust'] = regression_results['overall']['r2_robust']
results['r2_mean'] = regression_results['overall']['r2']
# 2. PAD特定的指标
# 计算PAD向量的角度误差
delta_pad_pred = predictions[:, :3]
delta_pad_true = targets[:, :3]
# 计算余弦相似度
cos_sim = F.cosine_similarity(delta_pad_pred, delta_pad_true, dim=1)
angle_error = torch.acos(torch.clamp(cos_sim, -1 + 1e-8, 1 - 1e-8)) * 180 / np.pi
results['pad_specific'] = {
'cosine_similarity_mean': cos_sim.mean().item(),
'cosine_similarity_std': cos_sim.std().item(),
'angle_error_mean': angle_error.mean().item(),
'angle_error_std': angle_error.std().item()
}
return results
def evaluate_predictions_diagnostic(self,
predictions: torch.Tensor,
targets: torch.Tensor,
component_names: List[str] = None) -> Dict[str, Any]:
"""
诊断模式评估:打印详细指标并返回结果
Args:
predictions: 预测值
targets: 真实值
component_names: 组件名称列表
Returns:
包含所有评估指标的字典
"""
# 先打印诊断指标
self.regression_metrics.print_diagnostic_metrics(predictions, targets, component_names)
# 然后返回完整结果
return self.evaluate_predictions(predictions, targets, component_names)
def generate_evaluation_report(self,
predictions: torch.Tensor,
targets: torch.Tensor,
save_path: Optional[str] = None) -> str:
"""
生成评估报告
Args:
predictions: 预测值
targets: 真实值
save_path: 报告保存路径
Returns:
评估报告文本
"""
results = self.evaluate_predictions(predictions, targets)
# 生成报告
report = []
report.append("=" * 60)
report.append("PAD预测器评估报告")
report.append("=" * 60)
# 整体回归指标
report.append("\n1. 整体回归指标:")
overall = results['regression']['overall']
report.append(f" MAE: {overall['mae']:.6f}")
report.append(f" RMSE: {overall['rmse']:.6f}")
report.append(f" R² (平均): {overall['r2']:.6f}")
report.append(f" R² (稳健): {overall['r2_robust']:.6f} ← 所有维度总方差比")
report.append(f" MAPE: {overall['mape']:.6f}")
# 组件回归指标
report.append("\n2. 各组件回归指标:")
components = results['regression']['components']
for name, metrics in components.items():
report.append(f" {name}:")
report.append(f" MAE: {metrics['mae']:.6f}")
report.append(f" RMSE: {metrics['rmse']:.6f}")
report.append(f" R²: {metrics['r2']:.6f}")
# 校准指标(已移除 - Confidence 不再作为输出维度)
# 注:置信度现在通过 MC Dropout 动态计算,不包含在评估报告中
# report.append("\n3. 置信度校准指标:")
# calibration = results.get('calibration', {})
# report.append(f" ECE: {calibration.get('ece', 0):.6f}")
# report.append(f" Sharpness: {calibration.get('sharpness', 0):.6f}")
# PAD特定指标
report.append("\n3. PAD特定指标:")
pad_specific = results['pad_specific']
report.append(f" 余弦相似度 (均值±标准差): {pad_specific['cosine_similarity_mean']:.4f} ± {pad_specific['cosine_similarity_std']:.4f}")
report.append(f" 角度误差 (均值±标准差): {pad_specific['angle_error_mean']:.2f}° ± {pad_specific['angle_error_std']:.2f}°")
report.append("\n" + "=" * 60)
report_text = "\n".join(report)
# 保存报告
if save_path:
with open(save_path, 'w', encoding='utf-8') as f:
f.write(report_text)
self.logger.info(f"评估报告已保存到: {save_path}")
return report_text
def create_metrics(metric_type: str = 'pad', **kwargs) -> Any:
"""
创建评估指标的工厂函数
Args:
metric_type: 指标类型 ('regression', 'calibration', 'pad')
**kwargs: 指标参数
Returns:
指标实例
"""
if metric_type == 'regression':
return RegressionMetrics()
elif metric_type == 'calibration':
return CalibrationMetrics(**kwargs)
elif metric_type == 'pad':
return PADMetrics()
else:
raise ValueError(f"不支持的指标类型: {metric_type}")
if __name__ == "__main__":
# 测试代码
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# 创建测试数据
batch_size = 100
predictions = torch.randn(batch_size, 5).to(device)
targets = torch.randn(batch_size, 5).to(device)
print("测试评估指标:")
print(f"输入形状: {predictions.shape}")
# 测试回归指标
regression_metrics = RegressionMetrics()
regression_results = regression_metrics.compute_all_metrics(predictions, targets)
print(f"\n整体回归指标:")
for key, value in regression_results['overall'].items():
print(f" {key}: {value:.6f}")
# 测试校准指标
calibration_metrics = CalibrationMetrics(n_bins=10)
pred_components = predictions[:, :4]
target_components = targets[:, :4]
pred_confidence = predictions[:, 4:5]
ece, bin_info = calibration_metrics.expected_calibration_error(
pred_components, target_components, pred_confidence
)
print(f"\nECE: {ece:.6f}")
# 测试PAD指标
pad_metrics = PADMetrics()
full_results = pad_metrics.evaluate_predictions(predictions, targets)
print(f"\n校准指标:")
calibration = full_results['calibration']
print(f" ECE: {calibration['ece']:.6f}")
print(f" Sharpness: {calibration['sharpness']:.6f}")
print(f"\nPAD特定指标:")
pad_specific = full_results['pad_specific']
for key, value in pad_specific.items():
print(f" {key}: {value:.6f}")
# 生成评估报告
report = pad_metrics.generate_evaluation_report(predictions, targets)
print(f"\n评估报告:")
print(report)
print("\n评估指标测试完成!")