azulgarza's picture
feat: add style changes and cleaning
7901bf8
raw
history blame contribute delete
951 Bytes
"""Data loading for the Impermanent Leaderboard."""
import logging
import pandas as pd
logger = logging.getLogger(__name__)
DATA_URL = "s3://impermanent-benchmark/v0.1.0/gh-archive/evaluations/evaluation_results.parquet"
def load_data() -> pd.DataFrame:
"""Load evaluation results from S3.
Falls back to the local ``mock_evaluation_results.csv`` when the
remote file is not reachable (e.g. first deploy, no network).
If the parquet contains a ``model_alias`` column it is used as the
display ``model`` name (the raw identifier is kept as ``model_id``).
Returns:
DataFrame with columns:
``dataset, subdataset, frequency, cutoff, metric, model, value``.
"""
df = pd.read_parquet(DATA_URL)
logger.info("Loaded %d rows from %s", len(df), DATA_URL)
if "model_alias" in df.columns:
df = df.rename(columns={"model": "model_id", "model_alias": "model"})
print(df)
return df