| from __future__ import annotations |
|
|
| from pathlib import Path |
| from typing import Any |
|
|
| import pandas as pd |
|
|
| from .paths import ensure_dir |
| from .utils import model_file_size_mb, save_json |
|
|
|
|
| RANK_COLUMNS = ["f1", "roc_auc", "balanced_accuracy"] |
|
|
|
|
| def rank_models(metrics_df: pd.DataFrame, config: dict[str, Any]) -> pd.DataFrame: |
| if metrics_df.empty: |
| return pd.DataFrame() |
| candidates = metrics_df[metrics_df["split"] == "test"].copy() |
| if candidates.empty: |
| candidates = metrics_df[metrics_df["split"] == "val"].copy() |
| for col in RANK_COLUMNS: |
| candidates[col] = pd.to_numeric(candidates[col], errors="coerce").fillna(0.0) |
| if "model_size_mb" not in candidates: |
| candidates["model_size_mb"] = candidates["model_path"].apply(model_file_size_mb) |
| candidates["model_size_mb"] = pd.to_numeric(candidates["model_size_mb"], errors="coerce").fillna(1e9) |
| if "avg_inference_ms" not in candidates: |
| candidates["avg_inference_ms"] = 1e9 |
| candidates["avg_inference_ms"] = pd.to_numeric(candidates["avg_inference_ms"], errors="coerce").fillna(1e9) |
| leaderboard = candidates.sort_values( |
| by=["f1", "roc_auc", "balanced_accuracy", "model_size_mb", "avg_inference_ms"], |
| ascending=[False, False, False, True, True], |
| ).reset_index(drop=True) |
| leaderboard.insert(0, "rank", range(1, len(leaderboard) + 1)) |
| output_dir = ensure_dir(config["paths"]["output_dir"]) |
| leaderboard_path = output_dir / "leaderboard.csv" |
| leaderboard.to_csv(leaderboard_path, index=False) |
| save_json(leaderboard.to_dict(orient="records"), output_dir / "leaderboard.json") |
| if not leaderboard.empty: |
| best = leaderboard.iloc[0].to_dict() |
| save_json(best, output_dir / "best_model.json") |
| return leaderboard |
|
|
|
|
| def load_best_model_record(config: dict[str, Any]) -> dict[str, Any]: |
| from .utils import load_json |
|
|
| path = Path(config["paths"]["output_dir"]) / "best_model.json" |
| if not path.exists(): |
| raise FileNotFoundError( |
| f"Best model record not found at {path}. Run training and evaluation first." |
| ) |
| record = load_json(path) |
| model_path = record.get("model_path") |
| if model_path and not Path(str(model_path)).exists(): |
| candidate = Path(config["paths"]["model_dir"]) / Path(str(model_path)).name |
| if candidate.exists(): |
| record["model_path"] = str(candidate) |
| return record |
|
|