Spaces:

MetabonetBench
/

leaderboard-space

Running

File size: 10,207 Bytes

import logging
from pathlib import Path

import pandas as pd
from huggingface_hub import HfApi, hf_hub_download
from huggingface_hub.utils import EntryNotFoundError

from config import (
    ACTIVATED_COL,
    DATASET_REPO_ID,
    DEFAULT_LEADERBOARD_CONTEXT,
    HF_TOKEN,
    HORIZONS,
    LEADERBOARD_METADATA_SUFFIX,
    LEADERBOARD_METRICS_SUFFIX,
    MAX_SAVED_SUBMISSIONS,
    METRIC_BASE_COLS,
    get_ground_truth_file,
    get_leaderboard_entry_dir,
)

LEADERBOARD_METADATA_COLS = ["Website", "Notes"]
LEADERBOARD_METRIC_COLS = [f"{metric}_{horizon}" for horizon in HORIZONS for metric in METRIC_BASE_COLS]
LEADERBOARD_METRICS_WITH_HISTORY_COLS = [*LEADERBOARD_METRIC_COLS, "Timestamp", ACTIVATED_COL]
STRING_LEADERBOARD_COLS = ["Website", "Notes", "Timestamp"]
ALL_COLUMNS = ["User", "Website", *LEADERBOARD_METRIC_COLS, "Timestamp", "Notes"]
EMPTY_METADATA = {"Website": "N/A", "Notes": "N/A"}
logger = logging.getLogger(__name__)


def get_user_metadata_filename(username: str) -> str:
    return f"{username}{LEADERBOARD_METADATA_SUFFIX}"


def get_user_metrics_filename(username: str) -> str:
    return f"{username}{LEADERBOARD_METRICS_SUFFIX}"


def get_user_metadata_repo_path(
    username: str,
    context: str = DEFAULT_LEADERBOARD_CONTEXT,
) -> str:
    return f"{get_leaderboard_entry_dir(context)}/{get_user_metadata_filename(username)}"


def get_user_metrics_repo_path(
    username: str,
    context: str = DEFAULT_LEADERBOARD_CONTEXT,
) -> str:
    return f"{get_leaderboard_entry_dir(context)}/{get_user_metrics_filename(username)}"


def build_user_metadata_df(website: str, notes: str) -> pd.DataFrame:
    return pd.DataFrame([{"Website": website, "Notes": notes}], columns=LEADERBOARD_METADATA_COLS)


def build_submission_metrics_row(
    scores: dict[str, float],
    timestamp: str,
    activated: bool = False,
) -> pd.DataFrame:
    row = {col: scores.get(col, float("nan")) for col in LEADERBOARD_METRIC_COLS}
    row["Timestamp"] = timestamp
    row[ACTIVATED_COL] = bool(activated)
    return pd.DataFrame([row], columns=LEADERBOARD_METRICS_WITH_HISTORY_COLS)


def _coerce_metric_columns(df: pd.DataFrame) -> pd.DataFrame:
    coerced_df = df.copy()
    for col in LEADERBOARD_METRIC_COLS:
        if col in coerced_df.columns:
            coerced_df[col] = pd.to_numeric(coerced_df[col], errors="coerce")
    return coerced_df


def _normalize_activated_col(df: pd.DataFrame) -> pd.DataFrame:
    normalized_df = df.copy()
    if ACTIVATED_COL not in normalized_df.columns:
        normalized_df[ACTIVATED_COL] = False
        if not normalized_df.empty:
            normalized_df.loc[normalized_df.index[0], ACTIVATED_COL] = True

    normalized_df[ACTIVATED_COL] = (
        normalized_df[ACTIVATED_COL]
        .fillna(False)
        .astype(str)
        .str.strip()
        .str.lower()
        .isin(["true", "1", "yes"])
    )
    return normalized_df


def normalize_submission_history_df(df: pd.DataFrame) -> pd.DataFrame:
    if df.empty:
        return pd.DataFrame(columns=LEADERBOARD_METRICS_WITH_HISTORY_COLS)

    normalized_df = df.copy()
    for col in LEADERBOARD_METRIC_COLS:
        if col not in normalized_df.columns:
            normalized_df[col] = float("nan")
    if "Timestamp" not in normalized_df.columns:
        normalized_df["Timestamp"] = "N/A"

    normalized_df = _coerce_metric_columns(normalized_df)
    normalized_df = _normalize_activated_col(normalized_df)
    normalized_df["Timestamp"] = normalized_df["Timestamp"].fillna("N/A").astype(str)
    normalized_df = normalized_df[LEADERBOARD_METRICS_WITH_HISTORY_COLS]
    return normalized_df.sort_values("Timestamp", ascending=False).reset_index(drop=True)


def cap_submission_history_df(df: pd.DataFrame) -> pd.DataFrame:
    normalized_df = normalize_submission_history_df(df)
    return normalized_df.head(MAX_SAVED_SUBMISSIONS).reset_index(drop=True)


def write_user_metrics_history_file(
    output_dir,
    username: str,
    metrics_history_df: pd.DataFrame,
):
    output_path = Path(output_dir)
    output_path.mkdir(parents=True, exist_ok=True)
    metrics_path = output_path / get_user_metrics_filename(username)
    normalized_history = cap_submission_history_df(metrics_history_df)
    normalized_history.to_csv(metrics_path, index=False)
    return metrics_path


def write_user_leaderboard_files(
    output_dir,
    username: str,
    website: str,
    notes: str,
    scores: dict[str, float],
    timestamp: str,
    activated: bool = False,
):
    output_path = Path(output_dir)
    output_path.mkdir(parents=True, exist_ok=True)

    metadata_path = output_path / get_user_metadata_filename(username)
    metrics_path = write_user_metrics_history_file(
        output_dir=output_path,
        username=username,
        metrics_history_df=build_submission_metrics_row(scores, timestamp, activated=activated),
    )

    build_user_metadata_df(website, notes).to_csv(metadata_path, index=False)
    return metadata_path, metrics_path


def get_ground_truth_path(context: str = DEFAULT_LEADERBOARD_CONTEXT) -> str:
    """Securely loads the hidden ground truth from the Dataset repo."""
    if not HF_TOKEN:
        raise ValueError("System HF_TOKEN is missing. Please configure Space Secrets.")
    ground_truth_file = get_ground_truth_file(context)
    try:
        file_path = hf_hub_download(
            repo_id=DATASET_REPO_ID,
            filename=ground_truth_file,
            repo_type="dataset",
            token=HF_TOKEN,
        )
        return file_path
    except Exception as e:
        raise ValueError(f"Failed to load {ground_truth_file}: {str(e)}")


def get_ground_truth(context: str = DEFAULT_LEADERBOARD_CONTEXT):
    """Securely loads the hidden ground truth from the Dataset repo."""
    return pd.read_parquet(get_ground_truth_path(context))


def _empty_leaderboard():
    return pd.DataFrame(columns=ALL_COLUMNS)


def _fill_missing_columns(df: pd.DataFrame) -> pd.DataFrame:
    for col in STRING_LEADERBOARD_COLS:
        if col not in df.columns:
            df[col] = "N/A"
        df[col] = df[col].fillna("N/A").astype(str).replace("nan", "N/A")

    for col in LEADERBOARD_METRIC_COLS:
        if col not in df.columns:
            df[col] = float("nan")

    return _coerce_metric_columns(df)


def _username_from_entry_filename(filename: str) -> str | None:
    if filename.endswith(LEADERBOARD_METADATA_SUFFIX):
        return filename[:-len(LEADERBOARD_METADATA_SUFFIX)]
    if filename.endswith(LEADERBOARD_METRICS_SUFFIX):
        return filename[:-len(LEADERBOARD_METRICS_SUFFIX)]
    return None


def _get_repo_leaderboard_usernames(
    context: str = DEFAULT_LEADERBOARD_CONTEXT,
) -> list[str]:
    api = HfApi()
    repo_files = api.list_repo_files(
        repo_id=DATASET_REPO_ID,
        repo_type="dataset",
        token=HF_TOKEN,
    )
    prefix = f"{get_leaderboard_entry_dir(context)}/"
    usernames = {
        username
        for repo_path in repo_files
        if repo_path.startswith(prefix)
        for username in [_username_from_entry_filename(Path(repo_path).name)]
        if username
    }
    return sorted(usernames)


def _read_repo_csv(repo_path: str) -> pd.DataFrame:
    file_path = hf_hub_download(
        repo_id=DATASET_REPO_ID,
        filename=repo_path,
        repo_type="dataset",
        token=HF_TOKEN,
        force_download=True,
    )
    return pd.read_csv(file_path)


def _read_repo_csv_or_empty(repo_path: str) -> pd.DataFrame:
    try:
        return _read_repo_csv(repo_path)
    except EntryNotFoundError:
        return pd.DataFrame()


def get_user_metadata(
    username: str,
    context: str = DEFAULT_LEADERBOARD_CONTEXT,
) -> dict[str, str]:
    if not HF_TOKEN:
        return EMPTY_METADATA.copy()

    metadata_df = _read_repo_csv_or_empty(get_user_metadata_repo_path(username, context))
    if metadata_df.empty:
        return EMPTY_METADATA.copy()

    metadata_row = metadata_df.iloc[0]
    return {
        "Website": metadata_row.get("Website", "N/A"),
        "Notes": metadata_row.get("Notes", "N/A"),
    }


def get_user_submission_history(
    username: str,
    context: str = DEFAULT_LEADERBOARD_CONTEXT,
) -> pd.DataFrame:
    if not HF_TOKEN:
        return pd.DataFrame(columns=LEADERBOARD_METRICS_WITH_HISTORY_COLS)

    metrics_df = _read_repo_csv_or_empty(get_user_metrics_repo_path(username, context))
    return normalize_submission_history_df(metrics_df)


def _get_active_submission_row(metrics_history_df: pd.DataFrame):
    if metrics_history_df.empty:
        return None
    active_rows = metrics_history_df[metrics_history_df[ACTIVATED_COL]]
    if active_rows.empty:
        return None
    return active_rows.iloc[0]


def _assemble_leaderboard_row(
    username: str,
    metadata: dict[str, str],
    active_metrics_row,
) -> dict:
    row = {
        "User": username,
        "Website": metadata.get("Website", "N/A"),
        "Notes": metadata.get("Notes", "N/A"),
        "Timestamp": active_metrics_row.get("Timestamp", "N/A"),
    }
    for col in LEADERBOARD_METRIC_COLS:
        row[col] = active_metrics_row.get(col, float("nan"))
    return row


def get_leaderboard(context: str = DEFAULT_LEADERBOARD_CONTEXT):
    """Fetches the latest leaderboard from the dataset repo (unsorted)."""
    if not HF_TOKEN:
        return _empty_leaderboard()

    try:
        usernames = _get_repo_leaderboard_usernames(context)
        if not usernames:
            return _empty_leaderboard()

        rows = []
        for username in usernames:
            metadata = get_user_metadata(username, context)
            metrics_history_df = get_user_submission_history(username, context)
            active_metrics_row = _get_active_submission_row(metrics_history_df)
            if active_metrics_row is None:
                continue
            rows.append(_assemble_leaderboard_row(username, metadata, active_metrics_row))

        if not rows:
            return _empty_leaderboard()

        df = pd.DataFrame(rows, columns=ALL_COLUMNS)
        return _fill_missing_columns(df)
    except Exception as exc:
        logger.warning("Failed to fetch leaderboard data: %s", exc)
        return _empty_leaderboard()