File size: 21,343 Bytes
0a6452f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 |
"""
评估指标模块
Metrics for PAD Predictor Evaluation
该模块包含了PAD预测器的各种评估指标,包括:
- 回归指标:MAE、RMSE、R²
- 置信度评估指标:ECE(Expected Calibration Error)
- 可靠性图表功能
"""
import torch
import torch.nn.functional as F
import numpy as np
from typing import Dict, List, Tuple, Optional, Any
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import logging
class RegressionMetrics:
"""回归评估指标类"""
def __init__(self):
self.logger = logging.getLogger(__name__)
@staticmethod
def mae(y_true: torch.Tensor, y_pred: torch.Tensor, reduction: str = 'mean') -> torch.Tensor:
"""
平均绝对误差 (Mean Absolute Error)
Args:
y_true: 真实值
y_pred: 预测值
reduction: 聚合方式 ('mean', 'sum', 'none')
Returns:
MAE值
"""
mae = torch.mean(torch.abs(y_pred - y_true), dim=0)
if reduction == 'mean':
return torch.mean(mae)
elif reduction == 'sum':
return torch.sum(mae)
else:
return mae
@staticmethod
def rmse(y_true: torch.Tensor, y_pred: torch.Tensor, reduction: str = 'mean') -> torch.Tensor:
"""
均方根误差 (Root Mean Square Error)
Args:
y_true: 真实值
y_pred: 预测值
reduction: 聚合方式 ('mean', 'sum', 'none')
Returns:
RMSE值
"""
mse = torch.mean((y_pred - y_true) ** 2, dim=0)
rmse = torch.sqrt(mse)
if reduction == 'mean':
return torch.mean(rmse)
elif reduction == 'sum':
return torch.sum(rmse)
else:
return rmse
@staticmethod
def r2_score(y_true: torch.Tensor, y_pred: torch.Tensor, reduction: str = 'mean') -> torch.Tensor:
"""
R²决定系数 (Coefficient of Determination)
Args:
y_true: 真实值
y_pred: 预测值
reduction: 聚合方式 ('mean', 'sum', 'none')
Returns:
R²值
"""
# 计算总平方和
ss_tot = torch.sum((y_true - torch.mean(y_true, dim=0)) ** 2, dim=0)
# 计算残差平方和
ss_res = torch.sum((y_true - y_pred) ** 2, dim=0)
# 避免除零
r2 = 1 - (ss_res / (ss_tot + 1e-8))
if reduction == 'mean':
return torch.mean(r2)
elif reduction == 'sum':
return torch.sum(r2)
else:
return r2
@staticmethod
def robust_r2(y_true: torch.Tensor, y_pred: torch.Tensor) -> torch.Tensor:
"""
稳健R²决定系数(Robust R² for Multi-Output Regression)
先对所有维度求和SS_res和SS_tot,然后计算一个总的R²。
这种方法更适合多目标回归,因为它考虑了所有目标的总方差。
公式:R²_robust = 1 - Σ(SS_res_all) / Σ(SS_tot_all)
Args:
y_true: 真实值,形状为 (batch_size, output_dim)
y_pred: 预测值,形状为 (batch_size, output_dim)
Returns:
稳健R²值(标量)
"""
# 对所有维度和样本求和的残差平方和
ss_res_total = torch.sum((y_true - y_pred) ** 2)
# 对所有维度和样本求和的总平方和
ss_tot_total = torch.sum((y_true - torch.mean(y_true, dim=0)) ** 2)
# 避免除零
r2_robust = 1 - (ss_res_total / (ss_tot_total + 1e-8))
return r2_robust
@staticmethod
def mape(y_true: torch.Tensor, y_pred: torch.Tensor, reduction: str = 'mean') -> torch.Tensor:
"""
平均绝对百分比误差 (Mean Absolute Percentage Error)
Args:
y_true: 真实值
y_pred: 预测值
reduction: 聚合方式
Returns:
MAPE值
"""
# 避免除零
mape = torch.mean(torch.abs((y_pred - y_true) / (y_true + 1e-8)), dim=0)
if reduction == 'mean':
return torch.mean(mape)
elif reduction == 'sum':
return torch.sum(mape)
else:
return mape
def compute_all_metrics(self,
y_true: torch.Tensor,
y_pred: torch.Tensor,
component_names: List[str] = None) -> Dict[str, Dict[str, float]]:
"""
计算所有回归指标
Args:
y_true: 真实值,形状为 (batch_size, output_dim)
y_pred: 预测值,形状为 (batch_size, output_dim)
component_names: 组件名称列表
Returns:
包含所有指标的嵌套字典
"""
if component_names is None:
component_names = ['delta_pad_p', 'delta_pad_a', 'delta_pad_d'] # 3维输出(移除confidence和delta_pressure)
metrics = {}
# 计算整体指标
metrics['overall'] = {
'mae': self.mae(y_true, y_pred).item(),
'rmse': self.rmse(y_true, y_pred).item(),
'r2': self.r2_score(y_true, y_pred).item(),
'r2_robust': self.robust_r2(y_true, y_pred).item(), # 新增稳健R²
'mape': self.mape(y_true, y_pred).item()
}
# 计算各组件指标
component_metrics = {}
for i, name in enumerate(component_names):
if i < y_true.size(1):
component_metrics[name] = {
'mae': self.mae(y_true[:, i], y_pred[:, i]).item(),
'rmse': self.rmse(y_true[:, i], y_pred[:, i]).item(),
'r2': self.r2_score(y_true[:, i], y_pred[:, i]).item(),
'mape': self.mape(y_true[:, i], y_pred[:, i]).item()
}
metrics['components'] = component_metrics
return metrics
def print_diagnostic_metrics(self,
y_true: torch.Tensor,
y_pred: torch.Tensor,
component_names: List[str] = None) -> None:
"""
打印诊断模式下的详细指标(每个维度的独立得分)
Args:
y_true: 真实值,形状为 (batch_size, output_dim)
y_pred: 预测值,形状为 (batch_size, output_dim)
component_names: 组件名称列表
"""
if component_names is None:
component_names = ['ΔPAD_P', 'ΔPAD_A', 'ΔPAD_D'] # 3维输出
print("\n" + "="*80)
print("🔍 诊断模式:各维度独立指标")
print("="*80)
# 计算稳健R²
r2_robust = self.robust_r2(y_true, y_pred).item()
r2_mean = self.r2_score(y_true, y_pred).item()
print(f"\n📊 整体指标:")
print(f" 稳健 R² (Robust R²): {r2_robust:.6f} ← 所有维度总方差比")
print(f" 平均 R² (Mean R²) : {r2_mean:.6f} ← 各维度R²的算术平均")
print(f" 差异 : {r2_robust - r2_mean:+.6f}")
print(f"\n📐 各维度详细指标:")
print(f"{'维度':<15} {'R²':<12} {'MAE':<12} {'RMSE':<12} {'MAPE':<12}")
print("-" * 80)
for i, name in enumerate(component_names):
if i < y_true.size(1):
mae = self.mae(y_true[:, i], y_pred[:, i]).item()
rmse = self.rmse(y_true[:, i], y_pred[:, i]).item()
r2 = self.r2_score(y_true[:, i], y_pred[:, i]).item()
mape = self.mape(y_true[:, i], y_pred[:, i]).item()
# R²值颜色标记
r2_str = f"{r2:.6f}"
if r2 >= 0.8:
r2_str = f"✅ {r2_str}"
elif r2 >= 0.5:
r2_str = f"⚠️ {r2_str}"
else:
r2_str = f"❌ {r2_str}"
print(f"{name:<15} {r2_str:<12} {mae:<12.6f} {rmse:<12.6f} {mape:<12.6f}")
print("="*80 + "\n")
class CalibrationMetrics:
"""置信度校准评估指标类"""
def __init__(self, n_bins: int = 10):
"""
初始化校准指标
Args:
n_bins: 分箱数量
"""
self.n_bins = n_bins
self.logger = logging.getLogger(__name__)
def expected_calibration_error(self,
predictions: torch.Tensor,
targets: torch.Tensor,
confidences: torch.Tensor) -> Tuple[float, List[Tuple]]:
"""
计算期望校准误差 (Expected Calibration Error)
Args:
predictions: 预测值,形状为 (batch_size, 4)
targets: 真实值,形状为 (batch_size, 4)
confidences: 置信度,形状为 (batch_size, 1)
Returns:
ECE值和分箱信息
"""
# 计算预测误差
errors = torch.mean((predictions - targets) ** 2, dim=1, keepdim=True)
# 将置信度归一化到[0,1]
confidences_norm = torch.sigmoid(confidences)
# 分箱
bin_boundaries = torch.linspace(0, 1, self.n_bins + 1)
bin_lowers = bin_boundaries[:-1]
bin_uppers = bin_boundaries[1:]
ece = torch.tensor(0.0, device=confidences_norm.device)
bin_info = []
for bin_lower, bin_upper in zip(bin_lowers, bin_uppers):
# 找到在当前分箱中的样本
in_bin = (confidences_norm > bin_lower) & (confidences_norm <= bin_upper)
prop_in_bin = in_bin.float().mean()
if prop_in_bin > 0:
# 计算当前分箱的平均置信度和平均误差
avg_confidence_in_bin = confidences_norm[in_bin].mean()
avg_error_in_bin = errors[in_bin].mean()
# 计算ECE贡献
ece += torch.abs(avg_confidence_in_bin - avg_error_in_bin) * prop_in_bin
bin_info.append({
'bin_lower': bin_lower.item(),
'bin_upper': bin_upper.item(),
'count': in_bin.sum().item(),
'avg_confidence': avg_confidence_in_bin.item(),
'avg_error': avg_error_in_bin.item(),
'accuracy': (1 - avg_error_in_bin).item()
})
return ece.item(), bin_info
def reliability_diagram(self,
predictions: torch.Tensor,
targets: torch.Tensor,
confidences: torch.Tensor,
save_path: Optional[str] = None) -> None:
"""
绘制可靠性图表
Args:
predictions: 预测值
targets: 真实值
confidences: 置信度
save_path: 保存路径
"""
ece, bin_info = self.expected_calibration_error(predictions, targets, confidences)
# 提取分箱信息
bin_lowers = [info['bin_lower'] for info in bin_info]
bin_uppers = [info['bin_upper'] for info in bin_info]
avg_confidences = [info['avg_confidence'] for info in bin_info]
accuracies = [info['accuracy'] for info in bin_info]
counts = [info['count'] for info in bin_info]
# 计算分箱中心
bin_centers = [(lower + upper) / 2 for lower, upper in zip(bin_lowers, bin_uppers)]
# 创建图表
plt.figure(figsize=(10, 6))
# 绘制可靠性图表
plt.plot([0, 1], [0, 1], 'k--', label='Perfect Calibration')
plt.plot(bin_centers, accuracies, 'bo-', label='Model', linewidth=2, markersize=8)
# 添加柱状图显示样本数量
ax2 = plt.gca().twinx()
ax2.bar(bin_centers, counts, width=0.1, alpha=0.3, color='gray', label='Sample Count')
ax2.set_ylabel('Sample Count', fontsize=12)
ax2.set_ylim(0, max(counts) * 1.2 if counts else 1)
# 设置图表属性
plt.xlabel('Confidence', fontsize=12)
plt.ylabel('Accuracy', fontsize=12)
plt.title(f'Reliability Diagram (ECE = {ece:.4f})', fontsize=14)
plt.legend(loc='upper left')
plt.grid(True, alpha=0.3)
plt.xlim(0, 1)
plt.ylim(0, 1)
# 保存图表
if save_path:
plt.savefig(save_path, dpi=300, bbox_inches='tight')
self.logger.info(f"可靠性图表已保存到: {save_path}")
plt.show()
def sharpness(self, confidences: torch.Tensor) -> float:
"""
计算置信度的锐度 (Sharpness)
Args:
confidences: 置信度
Returns:
锐度值(置信度的标准差)
"""
confidences_norm = torch.sigmoid(confidences)
return torch.std(confidences_norm).item()
class PADMetrics:
"""PAD特定的评估指标类"""
def __init__(self):
self.regression_metrics = RegressionMetrics()
self.calibration_metrics = CalibrationMetrics()
self.logger = logging.getLogger(__name__)
def evaluate_predictions(self,
predictions: torch.Tensor,
targets: torch.Tensor,
component_names: List[str] = None) -> Dict[str, Any]:
"""
全面评估预测结果
Args:
predictions: 预测值,形状为 (batch_size, 4) 或 (4,)
targets: 真实值,形状为 (batch_size, 4) 或 (4,)
component_names: 组件名称列表
Returns:
包含所有评估指标的字典
"""
if component_names is None:
component_names = ['delta_pad_p', 'delta_pad_a', 'delta_pad_d'] # 3维输出
# 确保张量至少是2维的
if predictions.dim() == 1:
predictions = predictions.unsqueeze(0)
if targets.dim() == 1:
targets = targets.unsqueeze(0)
results = {}
# 1. 回归指标
regression_results = self.regression_metrics.compute_all_metrics(
predictions, targets, component_names
)
results['regression'] = regression_results
# 添加稳健R²到顶层结果中方便访问
results['r2_robust'] = regression_results['overall']['r2_robust']
results['r2_mean'] = regression_results['overall']['r2']
# 2. PAD特定的指标
# 计算PAD向量的角度误差
delta_pad_pred = predictions[:, :3]
delta_pad_true = targets[:, :3]
# 计算余弦相似度
cos_sim = F.cosine_similarity(delta_pad_pred, delta_pad_true, dim=1)
angle_error = torch.acos(torch.clamp(cos_sim, -1 + 1e-8, 1 - 1e-8)) * 180 / np.pi
results['pad_specific'] = {
'cosine_similarity_mean': cos_sim.mean().item(),
'cosine_similarity_std': cos_sim.std().item(),
'angle_error_mean': angle_error.mean().item(),
'angle_error_std': angle_error.std().item()
}
return results
def evaluate_predictions_diagnostic(self,
predictions: torch.Tensor,
targets: torch.Tensor,
component_names: List[str] = None) -> Dict[str, Any]:
"""
诊断模式评估:打印详细指标并返回结果
Args:
predictions: 预测值
targets: 真实值
component_names: 组件名称列表
Returns:
包含所有评估指标的字典
"""
# 先打印诊断指标
self.regression_metrics.print_diagnostic_metrics(predictions, targets, component_names)
# 然后返回完整结果
return self.evaluate_predictions(predictions, targets, component_names)
def generate_evaluation_report(self,
predictions: torch.Tensor,
targets: torch.Tensor,
save_path: Optional[str] = None) -> str:
"""
生成评估报告
Args:
predictions: 预测值
targets: 真实值
save_path: 报告保存路径
Returns:
评估报告文本
"""
results = self.evaluate_predictions(predictions, targets)
# 生成报告
report = []
report.append("=" * 60)
report.append("PAD预测器评估报告")
report.append("=" * 60)
# 整体回归指标
report.append("\n1. 整体回归指标:")
overall = results['regression']['overall']
report.append(f" MAE: {overall['mae']:.6f}")
report.append(f" RMSE: {overall['rmse']:.6f}")
report.append(f" R² (平均): {overall['r2']:.6f}")
report.append(f" R² (稳健): {overall['r2_robust']:.6f} ← 所有维度总方差比")
report.append(f" MAPE: {overall['mape']:.6f}")
# 组件回归指标
report.append("\n2. 各组件回归指标:")
components = results['regression']['components']
for name, metrics in components.items():
report.append(f" {name}:")
report.append(f" MAE: {metrics['mae']:.6f}")
report.append(f" RMSE: {metrics['rmse']:.6f}")
report.append(f" R²: {metrics['r2']:.6f}")
# 校准指标(已移除 - Confidence 不再作为输出维度)
# 注:置信度现在通过 MC Dropout 动态计算,不包含在评估报告中
# report.append("\n3. 置信度校准指标:")
# calibration = results.get('calibration', {})
# report.append(f" ECE: {calibration.get('ece', 0):.6f}")
# report.append(f" Sharpness: {calibration.get('sharpness', 0):.6f}")
# PAD特定指标
report.append("\n3. PAD特定指标:")
pad_specific = results['pad_specific']
report.append(f" 余弦相似度 (均值±标准差): {pad_specific['cosine_similarity_mean']:.4f} ± {pad_specific['cosine_similarity_std']:.4f}")
report.append(f" 角度误差 (均值±标准差): {pad_specific['angle_error_mean']:.2f}° ± {pad_specific['angle_error_std']:.2f}°")
report.append("\n" + "=" * 60)
report_text = "\n".join(report)
# 保存报告
if save_path:
with open(save_path, 'w', encoding='utf-8') as f:
f.write(report_text)
self.logger.info(f"评估报告已保存到: {save_path}")
return report_text
def create_metrics(metric_type: str = 'pad', **kwargs) -> Any:
"""
创建评估指标的工厂函数
Args:
metric_type: 指标类型 ('regression', 'calibration', 'pad')
**kwargs: 指标参数
Returns:
指标实例
"""
if metric_type == 'regression':
return RegressionMetrics()
elif metric_type == 'calibration':
return CalibrationMetrics(**kwargs)
elif metric_type == 'pad':
return PADMetrics()
else:
raise ValueError(f"不支持的指标类型: {metric_type}")
if __name__ == "__main__":
# 测试代码
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# 创建测试数据
batch_size = 100
predictions = torch.randn(batch_size, 5).to(device)
targets = torch.randn(batch_size, 5).to(device)
print("测试评估指标:")
print(f"输入形状: {predictions.shape}")
# 测试回归指标
regression_metrics = RegressionMetrics()
regression_results = regression_metrics.compute_all_metrics(predictions, targets)
print(f"\n整体回归指标:")
for key, value in regression_results['overall'].items():
print(f" {key}: {value:.6f}")
# 测试校准指标
calibration_metrics = CalibrationMetrics(n_bins=10)
pred_components = predictions[:, :4]
target_components = targets[:, :4]
pred_confidence = predictions[:, 4:5]
ece, bin_info = calibration_metrics.expected_calibration_error(
pred_components, target_components, pred_confidence
)
print(f"\nECE: {ece:.6f}")
# 测试PAD指标
pad_metrics = PADMetrics()
full_results = pad_metrics.evaluate_predictions(predictions, targets)
print(f"\n校准指标:")
calibration = full_results['calibration']
print(f" ECE: {calibration['ece']:.6f}")
print(f" Sharpness: {calibration['sharpness']:.6f}")
print(f"\nPAD特定指标:")
pad_specific = full_results['pad_specific']
for key, value in pad_specific.items():
print(f" {key}: {value:.6f}")
# 生成评估报告
report = pad_metrics.generate_evaluation_report(predictions, targets)
print(f"\n评估报告:")
print(report)
print("\n评估指标测试完成!") |