File size: 3,210 Bytes
94e0fc9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
import numpy as np
import pandas as pd
# def rank_results(
#     results,
#     metric="f1",
#     clf_average="mean",
#     weights=None
# ):
#     """
#     对 FSExecutor 输出结果进行排行榜排序

#     Parameters
#     ----------
#     results : list of dict
#         每个 dict 是一个算法在一个数据集上的结果
#     metric : str
#         使用的指标: 'f1' or 'auc'
#     clf_average : str
#         'mean' 或 'max',表示跨分类器如何聚合
#     weights : dict or None
#         多指标加权,例如 {'f1':0.5, 'auc':0.5}

#     Returns
#     -------
#     ranked_df : pd.DataFrame
#     """

#     rows = []

#     for res in results:
#         algo = res["algorithm"]
#         metrics = res["metrics"]

#         # --------- 单指标 ----------
#         if weights is None:
#             vals = []
#             for clf, m in metrics.items():
#                 if metric in m:
#                     vals.append(m[metric])

#             if not vals:
#                 raise ValueError(f"No metric {metric} for {algo}")

#             score = np.mean(vals) if clf_average == "mean" else np.max(vals)

#         # --------- 多指标加权 ----------
#         else:
#             score = 0.0
#             for m_name, w in weights.items():
#                 vals = [
#                     metrics[clf][m_name]
#                     for clf in metrics
#                     if m_name in metrics[clf]
#                 ]
#                 score += w * np.mean(vals)

#         rows.append({
#             "algorithm": algo,
#             "score": score,
#             "num_features": res["num_features"],
#             "time": res.get("time", None)
#         })

#     df = pd.DataFrame(rows)

#     # --------- 按 score 排序 ----------
#     df = df.sort_values(
#         by="score",
#         ascending=False
#     ).reset_index(drop=True)

#     df["rank"] = df.index + 1

#     return df

def aggregate_metrics(metrics, w_f1=0.7, w_auc=0.3):
    """
    metrics:
        {
            "nb":  {"f1": x, "auc": y},
            "svm": {"f1": x, "auc": y},
            "rf":  {"f1": x, "auc": y},
        }
    """
    f1s = [m["f1"] for m in metrics.values()]
    aucs = [m["auc"] for m in metrics.values()]

    mean_f1 = sum(f1s) / len(f1s)
    mean_auc = sum(aucs) / len(aucs)

    return w_f1 * mean_f1 + w_auc * mean_auc,mean_f1,mean_auc

def rank_results(
    results,
):
    """
    results: list[dict]
    return: 排序后的 list[dict](每个 dict 会新增 score 字段)
    """

    ranked = []

    for r in results:
        # 1. 性能融合
        perf_score,mean_f1,mean_auc = aggregate_metrics(r["metrics"])

        # 2. 惩罚项
        # feature_penalty = alpha * r["num_features"]
        # time_penalty = beta * r["time"]

        # final_score = perf_score - feature_penalty - time_penalty
        final_score = perf_score
        ranked.append({
            **r,
            "mean_f1":mean_f1,
            "mean_auc":mean_auc,
            "score": final_score,
            "perf_score": perf_score
        })

    # 3. 排序(score 越大越好)
    ranked.sort(key=lambda x: x["score"], reverse=True)

    return ranked