| """Data loading for the Impermanent Leaderboard.""" | |
| import logging | |
| import pandas as pd | |
| logger = logging.getLogger(__name__) | |
| DATA_URL = "s3://impermanent-benchmark/v0.1.0/gh-archive/evaluations/evaluation_results.parquet" | |
| def load_data() -> pd.DataFrame: | |
| """Load evaluation results from S3. | |
| Falls back to the local ``mock_evaluation_results.csv`` when the | |
| remote file is not reachable (e.g. first deploy, no network). | |
| If the parquet contains a ``model_alias`` column it is used as the | |
| display ``model`` name (the raw identifier is kept as ``model_id``). | |
| Returns: | |
| DataFrame with columns: | |
| ``dataset, subdataset, frequency, cutoff, metric, model, value``. | |
| """ | |
| df = pd.read_parquet(DATA_URL) | |
| logger.info("Loaded %d rows from %s", len(df), DATA_URL) | |
| if "model_alias" in df.columns: | |
| df = df.rename(columns={"model": "model_id", "model_alias": "model"}) | |
| print(df) | |
| return df | |