File size: 21,343 Bytes
0a6452f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
"""
评估指标模块
Metrics for PAD Predictor Evaluation

该模块包含了PAD预测器的各种评估指标,包括:
- 回归指标:MAE、RMSE、R²
- 置信度评估指标:ECE(Expected Calibration Error)
- 可靠性图表功能
"""

import torch
import torch.nn.functional as F
import numpy as np
from typing import Dict, List, Tuple, Optional, Any
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import logging


class RegressionMetrics:
    """回归评估指标类"""
    
    def __init__(self):
        self.logger = logging.getLogger(__name__)
    
    @staticmethod
    def mae(y_true: torch.Tensor, y_pred: torch.Tensor, reduction: str = 'mean') -> torch.Tensor:
        """
        平均绝对误差 (Mean Absolute Error)
        
        Args:
            y_true: 真实值
            y_pred: 预测值
            reduction: 聚合方式 ('mean', 'sum', 'none')
            
        Returns:
            MAE值
        """
        mae = torch.mean(torch.abs(y_pred - y_true), dim=0)
        
        if reduction == 'mean':
            return torch.mean(mae)
        elif reduction == 'sum':
            return torch.sum(mae)
        else:
            return mae
    
    @staticmethod
    def rmse(y_true: torch.Tensor, y_pred: torch.Tensor, reduction: str = 'mean') -> torch.Tensor:
        """
        均方根误差 (Root Mean Square Error)
        
        Args:
            y_true: 真实值
            y_pred: 预测值
            reduction: 聚合方式 ('mean', 'sum', 'none')
            
        Returns:
            RMSE值
        """
        mse = torch.mean((y_pred - y_true) ** 2, dim=0)
        rmse = torch.sqrt(mse)
        
        if reduction == 'mean':
            return torch.mean(rmse)
        elif reduction == 'sum':
            return torch.sum(rmse)
        else:
            return rmse
    
    @staticmethod
    def r2_score(y_true: torch.Tensor, y_pred: torch.Tensor, reduction: str = 'mean') -> torch.Tensor:
        """
        R²决定系数 (Coefficient of Determination)

        Args:
            y_true: 真实值
            y_pred: 预测值
            reduction: 聚合方式 ('mean', 'sum', 'none')

        Returns:
            R²值
        """
        # 计算总平方和
        ss_tot = torch.sum((y_true - torch.mean(y_true, dim=0)) ** 2, dim=0)

        # 计算残差平方和
        ss_res = torch.sum((y_true - y_pred) ** 2, dim=0)

        # 避免除零
        r2 = 1 - (ss_res / (ss_tot + 1e-8))

        if reduction == 'mean':
            return torch.mean(r2)
        elif reduction == 'sum':
            return torch.sum(r2)
        else:
            return r2

    @staticmethod
    def robust_r2(y_true: torch.Tensor, y_pred: torch.Tensor) -> torch.Tensor:
        """
        稳健R²决定系数(Robust R² for Multi-Output Regression)

        先对所有维度求和SS_res和SS_tot,然后计算一个总的R²。
        这种方法更适合多目标回归,因为它考虑了所有目标的总方差。

        公式:R²_robust = 1 - Σ(SS_res_all) / Σ(SS_tot_all)

        Args:
            y_true: 真实值,形状为 (batch_size, output_dim)
            y_pred: 预测值,形状为 (batch_size, output_dim)

        Returns:
            稳健R²值(标量)
        """
        # 对所有维度和样本求和的残差平方和
        ss_res_total = torch.sum((y_true - y_pred) ** 2)

        # 对所有维度和样本求和的总平方和
        ss_tot_total = torch.sum((y_true - torch.mean(y_true, dim=0)) ** 2)

        # 避免除零
        r2_robust = 1 - (ss_res_total / (ss_tot_total + 1e-8))

        return r2_robust
    
    @staticmethod
    def mape(y_true: torch.Tensor, y_pred: torch.Tensor, reduction: str = 'mean') -> torch.Tensor:
        """
        平均绝对百分比误差 (Mean Absolute Percentage Error)
        
        Args:
            y_true: 真实值
            y_pred: 预测值
            reduction: 聚合方式
            
        Returns:
            MAPE值
        """
        # 避免除零
        mape = torch.mean(torch.abs((y_pred - y_true) / (y_true + 1e-8)), dim=0)
        
        if reduction == 'mean':
            return torch.mean(mape)
        elif reduction == 'sum':
            return torch.sum(mape)
        else:
            return mape
    
    def compute_all_metrics(self,
                          y_true: torch.Tensor,
                          y_pred: torch.Tensor,
                          component_names: List[str] = None) -> Dict[str, Dict[str, float]]:
        """
        计算所有回归指标

        Args:
            y_true: 真实值,形状为 (batch_size, output_dim)
            y_pred: 预测值,形状为 (batch_size, output_dim)
            component_names: 组件名称列表

        Returns:
            包含所有指标的嵌套字典
        """
        if component_names is None:
            component_names = ['delta_pad_p', 'delta_pad_a', 'delta_pad_d']  # 3维输出(移除confidence和delta_pressure)

        metrics = {}

        # 计算整体指标
        metrics['overall'] = {
            'mae': self.mae(y_true, y_pred).item(),
            'rmse': self.rmse(y_true, y_pred).item(),
            'r2': self.r2_score(y_true, y_pred).item(),
            'r2_robust': self.robust_r2(y_true, y_pred).item(),  # 新增稳健R²
            'mape': self.mape(y_true, y_pred).item()
        }

        # 计算各组件指标
        component_metrics = {}
        for i, name in enumerate(component_names):
            if i < y_true.size(1):
                component_metrics[name] = {
                    'mae': self.mae(y_true[:, i], y_pred[:, i]).item(),
                    'rmse': self.rmse(y_true[:, i], y_pred[:, i]).item(),
                    'r2': self.r2_score(y_true[:, i], y_pred[:, i]).item(),
                    'mape': self.mape(y_true[:, i], y_pred[:, i]).item()
                }

        metrics['components'] = component_metrics

        return metrics

    def print_diagnostic_metrics(self,
                                y_true: torch.Tensor,
                                y_pred: torch.Tensor,
                                component_names: List[str] = None) -> None:
        """
        打印诊断模式下的详细指标(每个维度的独立得分)

        Args:
            y_true: 真实值,形状为 (batch_size, output_dim)
            y_pred: 预测值,形状为 (batch_size, output_dim)
            component_names: 组件名称列表
        """
        if component_names is None:
            component_names = ['ΔPAD_P', 'ΔPAD_A', 'ΔPAD_D']  # 3维输出

        print("\n" + "="*80)
        print("🔍 诊断模式:各维度独立指标")
        print("="*80)

        # 计算稳健R²
        r2_robust = self.robust_r2(y_true, y_pred).item()
        r2_mean = self.r2_score(y_true, y_pred).item()

        print(f"\n📊 整体指标:")
        print(f"  稳健 R² (Robust R²): {r2_robust:.6f}  ← 所有维度总方差比")
        print(f"  平均 R² (Mean R²)  : {r2_mean:.6f}  ← 各维度R²的算术平均")
        print(f"  差异               : {r2_robust - r2_mean:+.6f}")

        print(f"\n📐 各维度详细指标:")
        print(f"{'维度':<15} {'R²':<12} {'MAE':<12} {'RMSE':<12} {'MAPE':<12}")
        print("-" * 80)

        for i, name in enumerate(component_names):
            if i < y_true.size(1):
                mae = self.mae(y_true[:, i], y_pred[:, i]).item()
                rmse = self.rmse(y_true[:, i], y_pred[:, i]).item()
                r2 = self.r2_score(y_true[:, i], y_pred[:, i]).item()
                mape = self.mape(y_true[:, i], y_pred[:, i]).item()

                # R²值颜色标记
                r2_str = f"{r2:.6f}"
                if r2 >= 0.8:
                    r2_str = f"✅ {r2_str}"
                elif r2 >= 0.5:
                    r2_str = f"⚠️  {r2_str}"
                else:
                    r2_str = f"❌ {r2_str}"

                print(f"{name:<15} {r2_str:<12} {mae:<12.6f} {rmse:<12.6f} {mape:<12.6f}")

        print("="*80 + "\n")


class CalibrationMetrics:
    """置信度校准评估指标类"""
    
    def __init__(self, n_bins: int = 10):
        """
        初始化校准指标
        
        Args:
            n_bins: 分箱数量
        """
        self.n_bins = n_bins
        self.logger = logging.getLogger(__name__)
    
    def expected_calibration_error(self, 
                                 predictions: torch.Tensor, 
                                 targets: torch.Tensor,
                                 confidences: torch.Tensor) -> Tuple[float, List[Tuple]]:
        """
        计算期望校准误差 (Expected Calibration Error)
        
        Args:
            predictions: 预测值,形状为 (batch_size, 4)
            targets: 真实值,形状为 (batch_size, 4)
            confidences: 置信度,形状为 (batch_size, 1)
            
        Returns:
            ECE值和分箱信息
        """
        # 计算预测误差
        errors = torch.mean((predictions - targets) ** 2, dim=1, keepdim=True)
        
        # 将置信度归一化到[0,1]
        confidences_norm = torch.sigmoid(confidences)
        
        # 分箱
        bin_boundaries = torch.linspace(0, 1, self.n_bins + 1)
        bin_lowers = bin_boundaries[:-1]
        bin_uppers = bin_boundaries[1:]
        
        ece = torch.tensor(0.0, device=confidences_norm.device)
        bin_info = []

        for bin_lower, bin_upper in zip(bin_lowers, bin_uppers):
            # 找到在当前分箱中的样本
            in_bin = (confidences_norm > bin_lower) & (confidences_norm <= bin_upper)
            prop_in_bin = in_bin.float().mean()

            if prop_in_bin > 0:
                # 计算当前分箱的平均置信度和平均误差
                avg_confidence_in_bin = confidences_norm[in_bin].mean()
                avg_error_in_bin = errors[in_bin].mean()

                # 计算ECE贡献
                ece += torch.abs(avg_confidence_in_bin - avg_error_in_bin) * prop_in_bin
                
                bin_info.append({
                    'bin_lower': bin_lower.item(),
                    'bin_upper': bin_upper.item(),
                    'count': in_bin.sum().item(),
                    'avg_confidence': avg_confidence_in_bin.item(),
                    'avg_error': avg_error_in_bin.item(),
                    'accuracy': (1 - avg_error_in_bin).item()
                })
        
        return ece.item(), bin_info
    
    def reliability_diagram(self,
                          predictions: torch.Tensor,
                          targets: torch.Tensor, 
                          confidences: torch.Tensor,
                          save_path: Optional[str] = None) -> None:
        """
        绘制可靠性图表
        
        Args:
            predictions: 预测值
            targets: 真实值
            confidences: 置信度
            save_path: 保存路径
        """
        ece, bin_info = self.expected_calibration_error(predictions, targets, confidences)
        
        # 提取分箱信息
        bin_lowers = [info['bin_lower'] for info in bin_info]
        bin_uppers = [info['bin_upper'] for info in bin_info]
        avg_confidences = [info['avg_confidence'] for info in bin_info]
        accuracies = [info['accuracy'] for info in bin_info]
        counts = [info['count'] for info in bin_info]
        
        # 计算分箱中心
        bin_centers = [(lower + upper) / 2 for lower, upper in zip(bin_lowers, bin_uppers)]
        
        # 创建图表
        plt.figure(figsize=(10, 6))
        
        # 绘制可靠性图表
        plt.plot([0, 1], [0, 1], 'k--', label='Perfect Calibration')
        plt.plot(bin_centers, accuracies, 'bo-', label='Model', linewidth=2, markersize=8)
        
        # 添加柱状图显示样本数量
        ax2 = plt.gca().twinx()
        ax2.bar(bin_centers, counts, width=0.1, alpha=0.3, color='gray', label='Sample Count')
        ax2.set_ylabel('Sample Count', fontsize=12)
        ax2.set_ylim(0, max(counts) * 1.2 if counts else 1)
        
        # 设置图表属性
        plt.xlabel('Confidence', fontsize=12)
        plt.ylabel('Accuracy', fontsize=12)
        plt.title(f'Reliability Diagram (ECE = {ece:.4f})', fontsize=14)
        plt.legend(loc='upper left')
        plt.grid(True, alpha=0.3)
        plt.xlim(0, 1)
        plt.ylim(0, 1)
        
        # 保存图表
        if save_path:
            plt.savefig(save_path, dpi=300, bbox_inches='tight')
            self.logger.info(f"可靠性图表已保存到: {save_path}")
        
        plt.show()
    
    def sharpness(self, confidences: torch.Tensor) -> float:
        """
        计算置信度的锐度 (Sharpness)
        
        Args:
            confidences: 置信度
            
        Returns:
            锐度值(置信度的标准差)
        """
        confidences_norm = torch.sigmoid(confidences)
        return torch.std(confidences_norm).item()


class PADMetrics:
    """PAD特定的评估指标类"""
    
    def __init__(self):
        self.regression_metrics = RegressionMetrics()
        self.calibration_metrics = CalibrationMetrics()
        self.logger = logging.getLogger(__name__)
    
    def evaluate_predictions(self,
                           predictions: torch.Tensor,
                           targets: torch.Tensor,
                           component_names: List[str] = None) -> Dict[str, Any]:
        """
        全面评估预测结果

        Args:
            predictions: 预测值,形状为 (batch_size, 4) 或 (4,)
            targets: 真实值,形状为 (batch_size, 4) 或 (4,)
            component_names: 组件名称列表

        Returns:
            包含所有评估指标的字典
        """
        if component_names is None:
            component_names = ['delta_pad_p', 'delta_pad_a', 'delta_pad_d']  # 3维输出

        # 确保张量至少是2维的
        if predictions.dim() == 1:
            predictions = predictions.unsqueeze(0)
        if targets.dim() == 1:
            targets = targets.unsqueeze(0)

        results = {}

        # 1. 回归指标
        regression_results = self.regression_metrics.compute_all_metrics(
            predictions, targets, component_names
        )
        results['regression'] = regression_results

        # 添加稳健R²到顶层结果中方便访问
        results['r2_robust'] = regression_results['overall']['r2_robust']
        results['r2_mean'] = regression_results['overall']['r2']

        # 2. PAD特定的指标
        # 计算PAD向量的角度误差
        delta_pad_pred = predictions[:, :3]
        delta_pad_true = targets[:, :3]

        # 计算余弦相似度
        cos_sim = F.cosine_similarity(delta_pad_pred, delta_pad_true, dim=1)
        angle_error = torch.acos(torch.clamp(cos_sim, -1 + 1e-8, 1 - 1e-8)) * 180 / np.pi

        results['pad_specific'] = {
            'cosine_similarity_mean': cos_sim.mean().item(),
            'cosine_similarity_std': cos_sim.std().item(),
            'angle_error_mean': angle_error.mean().item(),
            'angle_error_std': angle_error.std().item()
        }

        return results

    def evaluate_predictions_diagnostic(self,
                                       predictions: torch.Tensor,
                                       targets: torch.Tensor,
                                       component_names: List[str] = None) -> Dict[str, Any]:
        """
        诊断模式评估:打印详细指标并返回结果

        Args:
            predictions: 预测值
            targets: 真实值
            component_names: 组件名称列表

        Returns:
            包含所有评估指标的字典
        """
        # 先打印诊断指标
        self.regression_metrics.print_diagnostic_metrics(predictions, targets, component_names)

        # 然后返回完整结果
        return self.evaluate_predictions(predictions, targets, component_names)

    def generate_evaluation_report(self,
                                 predictions: torch.Tensor,
                                 targets: torch.Tensor,
                                 save_path: Optional[str] = None) -> str:
        """
        生成评估报告
        
        Args:
            predictions: 预测值
            targets: 真实值
            save_path: 报告保存路径
            
        Returns:
            评估报告文本
        """
        results = self.evaluate_predictions(predictions, targets)
        
        # 生成报告
        report = []
        report.append("=" * 60)
        report.append("PAD预测器评估报告")
        report.append("=" * 60)
        
        # 整体回归指标
        report.append("\n1. 整体回归指标:")
        overall = results['regression']['overall']
        report.append(f"   MAE:       {overall['mae']:.6f}")
        report.append(f"   RMSE:      {overall['rmse']:.6f}")
        report.append(f"   R² (平均): {overall['r2']:.6f}")
        report.append(f"   R² (稳健): {overall['r2_robust']:.6f}  ← 所有维度总方差比")
        report.append(f"   MAPE:      {overall['mape']:.6f}")
        
        # 组件回归指标
        report.append("\n2. 各组件回归指标:")
        components = results['regression']['components']
        for name, metrics in components.items():
            report.append(f"   {name}:")
            report.append(f"     MAE:  {metrics['mae']:.6f}")
            report.append(f"     RMSE: {metrics['rmse']:.6f}")
            report.append(f"     R²:   {metrics['r2']:.6f}")

        # 校准指标(已移除 - Confidence 不再作为输出维度)
        # 注:置信度现在通过 MC Dropout 动态计算,不包含在评估报告中
        # report.append("\n3. 置信度校准指标:")
        # calibration = results.get('calibration', {})
        # report.append(f"   ECE:      {calibration.get('ece', 0):.6f}")
        # report.append(f"   Sharpness: {calibration.get('sharpness', 0):.6f}")

        # PAD特定指标
        report.append("\n3. PAD特定指标:")
        pad_specific = results['pad_specific']
        report.append(f"   余弦相似度 (均值±标准差): {pad_specific['cosine_similarity_mean']:.4f} ± {pad_specific['cosine_similarity_std']:.4f}")
        report.append(f"   角度误差 (均值±标准差):   {pad_specific['angle_error_mean']:.2f}° ± {pad_specific['angle_error_std']:.2f}°")
        
        report.append("\n" + "=" * 60)
        
        report_text = "\n".join(report)
        
        # 保存报告
        if save_path:
            with open(save_path, 'w', encoding='utf-8') as f:
                f.write(report_text)
            self.logger.info(f"评估报告已保存到: {save_path}")
        
        return report_text


def create_metrics(metric_type: str = 'pad', **kwargs) -> Any:
    """
    创建评估指标的工厂函数
    
    Args:
        metric_type: 指标类型 ('regression', 'calibration', 'pad')
        **kwargs: 指标参数
        
    Returns:
        指标实例
    """
    if metric_type == 'regression':
        return RegressionMetrics()
    elif metric_type == 'calibration':
        return CalibrationMetrics(**kwargs)
    elif metric_type == 'pad':
        return PADMetrics()
    else:
        raise ValueError(f"不支持的指标类型: {metric_type}")


if __name__ == "__main__":
    # 测试代码
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    
    # 创建测试数据
    batch_size = 100
    predictions = torch.randn(batch_size, 5).to(device)
    targets = torch.randn(batch_size, 5).to(device)
    
    print("测试评估指标:")
    print(f"输入形状: {predictions.shape}")
    
    # 测试回归指标
    regression_metrics = RegressionMetrics()
    regression_results = regression_metrics.compute_all_metrics(predictions, targets)
    
    print(f"\n整体回归指标:")
    for key, value in regression_results['overall'].items():
        print(f"  {key}: {value:.6f}")
    
    # 测试校准指标
    calibration_metrics = CalibrationMetrics(n_bins=10)
    pred_components = predictions[:, :4]
    target_components = targets[:, :4]
    pred_confidence = predictions[:, 4:5]
    
    ece, bin_info = calibration_metrics.expected_calibration_error(
        pred_components, target_components, pred_confidence
    )
    print(f"\nECE: {ece:.6f}")
    
    # 测试PAD指标
    pad_metrics = PADMetrics()
    full_results = pad_metrics.evaluate_predictions(predictions, targets)
    
    print(f"\n校准指标:")
    calibration = full_results['calibration']
    print(f"  ECE: {calibration['ece']:.6f}")
    print(f"  Sharpness: {calibration['sharpness']:.6f}")
    
    print(f"\nPAD特定指标:")
    pad_specific = full_results['pad_specific']
    for key, value in pad_specific.items():
        print(f"  {key}: {value:.6f}")
    
    # 生成评估报告
    report = pad_metrics.generate_evaluation_report(predictions, targets)
    print(f"\n评估报告:")
    print(report)
    
    print("\n评估指标测试完成!")