File size: 951 Bytes
46dbc41
 
 
 
 
 
 
 
7898153
46dbc41
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7901bf8
 
46dbc41
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
"""Data loading for the Impermanent Leaderboard."""

import logging

import pandas as pd

logger = logging.getLogger(__name__)

DATA_URL = "s3://impermanent-benchmark/v0.1.0/gh-archive/evaluations/evaluation_results.parquet"


def load_data() -> pd.DataFrame:
    """Load evaluation results from S3.

    Falls back to the local ``mock_evaluation_results.csv`` when the
    remote file is not reachable (e.g. first deploy, no network).

    If the parquet contains a ``model_alias`` column it is used as the
    display ``model`` name (the raw identifier is kept as ``model_id``).

    Returns:
        DataFrame with columns:
        ``dataset, subdataset, frequency, cutoff, metric, model, value``.
    """

    df = pd.read_parquet(DATA_URL)
    logger.info("Loaded %d rows from %s", len(df), DATA_URL)

    if "model_alias" in df.columns:
        df = df.rename(columns={"model": "model_id", "model_alias": "model"})

    print(df)

    return df