"""Data loading for the Impermanent Leaderboard.""" import logging import pandas as pd logger = logging.getLogger(__name__) DATA_URL = "s3://impermanent-benchmark/v0.1.0/gh-archive/evaluations/evaluation_results.parquet" def load_data() -> pd.DataFrame: """Load evaluation results from S3. Falls back to the local ``mock_evaluation_results.csv`` when the remote file is not reachable (e.g. first deploy, no network). If the parquet contains a ``model_alias`` column it is used as the display ``model`` name (the raw identifier is kept as ``model_id``). Returns: DataFrame with columns: ``dataset, subdataset, frequency, cutoff, metric, model, value``. """ df = pd.read_parquet(DATA_URL) logger.info("Loaded %d rows from %s", len(df), DATA_URL) if "model_alias" in df.columns: df = df.rename(columns={"model": "model_id", "model_alias": "model"}) print(df) return df