| import numpy as np | |
| import pandas as pd | |
| # def rank_results( | |
| # results, | |
| # metric="f1", | |
| # clf_average="mean", | |
| # weights=None | |
| # ): | |
| # """ | |
| # 对 FSExecutor 输出结果进行排行榜排序 | |
| # Parameters | |
| # ---------- | |
| # results : list of dict | |
| # 每个 dict 是一个算法在一个数据集上的结果 | |
| # metric : str | |
| # 使用的指标: 'f1' or 'auc' | |
| # clf_average : str | |
| # 'mean' 或 'max',表示跨分类器如何聚合 | |
| # weights : dict or None | |
| # 多指标加权,例如 {'f1':0.5, 'auc':0.5} | |
| # Returns | |
| # ------- | |
| # ranked_df : pd.DataFrame | |
| # """ | |
| # rows = [] | |
| # for res in results: | |
| # algo = res["algorithm"] | |
| # metrics = res["metrics"] | |
| # # --------- 单指标 ---------- | |
| # if weights is None: | |
| # vals = [] | |
| # for clf, m in metrics.items(): | |
| # if metric in m: | |
| # vals.append(m[metric]) | |
| # if not vals: | |
| # raise ValueError(f"No metric {metric} for {algo}") | |
| # score = np.mean(vals) if clf_average == "mean" else np.max(vals) | |
| # # --------- 多指标加权 ---------- | |
| # else: | |
| # score = 0.0 | |
| # for m_name, w in weights.items(): | |
| # vals = [ | |
| # metrics[clf][m_name] | |
| # for clf in metrics | |
| # if m_name in metrics[clf] | |
| # ] | |
| # score += w * np.mean(vals) | |
| # rows.append({ | |
| # "algorithm": algo, | |
| # "score": score, | |
| # "num_features": res["num_features"], | |
| # "time": res.get("time", None) | |
| # }) | |
| # df = pd.DataFrame(rows) | |
| # # --------- 按 score 排序 ---------- | |
| # df = df.sort_values( | |
| # by="score", | |
| # ascending=False | |
| # ).reset_index(drop=True) | |
| # df["rank"] = df.index + 1 | |
| # return df | |
| def aggregate_metrics(metrics, w_f1=0.7, w_auc=0.3): | |
| """ | |
| metrics: | |
| { | |
| "nb": {"f1": x, "auc": y}, | |
| "svm": {"f1": x, "auc": y}, | |
| "rf": {"f1": x, "auc": y}, | |
| } | |
| """ | |
| f1s = [m["f1"] for m in metrics.values()] | |
| aucs = [m["auc"] for m in metrics.values()] | |
| mean_f1 = sum(f1s) / len(f1s) | |
| mean_auc = sum(aucs) / len(aucs) | |
| return w_f1 * mean_f1 + w_auc * mean_auc,mean_f1,mean_auc | |
| def rank_results( | |
| results, | |
| ): | |
| """ | |
| results: list[dict] | |
| return: 排序后的 list[dict](每个 dict 会新增 score 字段) | |
| """ | |
| ranked = [] | |
| for r in results: | |
| # 1. 性能融合 | |
| perf_score,mean_f1,mean_auc = aggregate_metrics(r["metrics"]) | |
| # 2. 惩罚项 | |
| # feature_penalty = alpha * r["num_features"] | |
| # time_penalty = beta * r["time"] | |
| # final_score = perf_score - feature_penalty - time_penalty | |
| final_score = perf_score | |
| ranked.append({ | |
| **r, | |
| "mean_f1":mean_f1, | |
| "mean_auc":mean_auc, | |
| "score": final_score, | |
| "perf_score": perf_score | |
| }) | |
| # 3. 排序(score 越大越好) | |
| ranked.sort(key=lambda x: x["score"], reverse=True) | |
| return ranked | |