File size: 3,404 Bytes
c85dcc4
3f84332
6c930b9
c85dcc4
 
 
 
 
3f84332
6c930b9
 
3f84332
 
 
 
c85dcc4
 
 
 
 
 
 
b5bc528
 
c85dcc4
 
b5bc528
 
c85dcc4
 
6c930b9
 
 
 
 
 
 
 
 
 
b5bc528
6c930b9
 
3f84332
6c930b9
 
 
 
 
 
 
 
 
 
 
 
 
b5bc528
6c930b9
 
3f84332
c85dcc4
 
 
3f84332
b5bc528
 
c85dcc4
 
 
 
 
 
 
 
c2c3c10
 
 
3f84332
b5bc528
 
c2c3c10
 
 
 
 
 
 
 
3f84332
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
import sys
import typing
from functools import lru_cache
from pathlib import Path

from huggingface_hub import snapshot_download
from loguru import logger

from src.envs import settings
from src.schemas.display_toml import DisplayToml
from src.schemas.meta_toml import MetaToml
from src.utils.hf import restart_space

if typing.TYPE_CHECKING:
    from src.schemas.meta_toml import MetaToml_Benchmark

if sys.version_info < (3, 11):
    from tomli import load as toml_load
else:
    from tomllib import load as toml_load


# Use cache to avoid downloading the same data multiple times
@lru_cache(maxsize=1)
def prepare_space():
    """Space initialisation"""
    download_results()
    download_queue()


def download_results():
    try:
        snapshot_download(
            repo_id=settings.QUEUE_REPO_ID,
            local_dir=settings.EVAL_REQUESTS_PATH,
            repo_type="dataset",
            tqdm_class=None,
            etag_timeout=30,
            token=settings.HF_TOKEN.get_secret_value(),
        )
        logger.info("Downloaded eval queue")
    except Exception as e:
        logger.error(f"Error downloading eval queue: {e!s}")
        restart_space(settings.REPO_ID)


def download_queue():
    try:
        snapshot_download(
            repo_id=settings.RESULTS_REPO_ID,
            local_dir=settings.EVAL_RESULTS_PATH,
            repo_type="dataset",
            tqdm_class=None,
            etag_timeout=30,
            allow_patterns=["leaderboard/*.toml", "leaderboard/**/*.json"],
            token=settings.HF_TOKEN.get_secret_value(),
        )
        logger.info("Downloaded eval results")
    except Exception as e:
        logger.error(f"Error downloading eval queue: {e!s}")
        restart_space(settings.REPO_ID)


@lru_cache(maxsize=1)
def load_meta_toml() -> "MetaToml":
    prepare_space()

    meta_toml_path = Path(settings.EVAL_RESULTS_PATH) / "leaderboard" / "meta.toml"
    logger.info(f'Loading meta.toml from: {meta_toml_path.as_posix()!r}')
    with meta_toml_path.open("rb") as f:
        data = toml_load(f)
    meta_toml = MetaToml.model_validate(data)
    logger.info("Loaded meta.toml")
    assert meta_toml is not None, f"Failed to load meta.toml: {meta_toml_path.as_posix()!r}"
    return meta_toml


@lru_cache(maxsize=1)
def load_display_toml() -> "DisplayToml":
    prepare_space()

    display_toml_path = Path(settings.EVAL_RESULTS_PATH) / "leaderboard" / "display.toml"
    logger.info(f'Loading display.toml from: {display_toml_path.as_posix()!r}')
    with display_toml_path.open("rb") as f:
        data = toml_load(f)
    display_toml = DisplayToml.model_validate(data)
    logger.info("Loaded display.toml")
    assert display_toml is not None, f"Failed to load display.toml: {display_toml_path.as_posix()!r}"
    return display_toml


@lru_cache(maxsize=1)
def get_benchmarks() -> list["MetaToml_Benchmark"]:
    meta_toml = load_meta_toml()
    display_toml = load_display_toml()
    benchmarks_map = {b.key: b for b in meta_toml.benchmarks if not b.disabled}
    benchmarks = []
    # Sort benchmarks by display order
    for key in display_toml.benchmarks_order:
        b = benchmarks_map.pop(key, None)
        if b is not None:
            benchmarks.append(b)
    benchmarks.extend(benchmarks_map.values())
    logger.info(f"Loaded {len(benchmarks)} benchmarks: titles={[b.title for b in benchmarks]!r}")
    return benchmarks