EASI-Leaderboard / src /prepare.py
yangzhitao
fix: simplify prepare_space function by removing global flag and ensuring data downloads are always executed
b5bc528
raw
history blame
3.4 kB
import sys
import typing
from functools import lru_cache
from pathlib import Path
from huggingface_hub import snapshot_download
from loguru import logger
from src.envs import settings
from src.schemas.display_toml import DisplayToml
from src.schemas.meta_toml import MetaToml
from src.utils.hf import restart_space
if typing.TYPE_CHECKING:
from src.schemas.meta_toml import MetaToml_Benchmark
if sys.version_info < (3, 11):
from tomli import load as toml_load
else:
from tomllib import load as toml_load
# Use cache to avoid downloading the same data multiple times
@lru_cache(maxsize=1)
def prepare_space():
"""Space initialisation"""
download_results()
download_queue()
def download_results():
try:
snapshot_download(
repo_id=settings.QUEUE_REPO_ID,
local_dir=settings.EVAL_REQUESTS_PATH,
repo_type="dataset",
tqdm_class=None,
etag_timeout=30,
token=settings.HF_TOKEN.get_secret_value(),
)
logger.info("Downloaded eval queue")
except Exception as e:
logger.error(f"Error downloading eval queue: {e!s}")
restart_space(settings.REPO_ID)
def download_queue():
try:
snapshot_download(
repo_id=settings.RESULTS_REPO_ID,
local_dir=settings.EVAL_RESULTS_PATH,
repo_type="dataset",
tqdm_class=None,
etag_timeout=30,
allow_patterns=["leaderboard/*.toml", "leaderboard/**/*.json"],
token=settings.HF_TOKEN.get_secret_value(),
)
logger.info("Downloaded eval results")
except Exception as e:
logger.error(f"Error downloading eval queue: {e!s}")
restart_space(settings.REPO_ID)
@lru_cache(maxsize=1)
def load_meta_toml() -> "MetaToml":
prepare_space()
meta_toml_path = Path(settings.EVAL_RESULTS_PATH) / "leaderboard" / "meta.toml"
logger.info(f'Loading meta.toml from: {meta_toml_path.as_posix()!r}')
with meta_toml_path.open("rb") as f:
data = toml_load(f)
meta_toml = MetaToml.model_validate(data)
logger.info("Loaded meta.toml")
assert meta_toml is not None, f"Failed to load meta.toml: {meta_toml_path.as_posix()!r}"
return meta_toml
@lru_cache(maxsize=1)
def load_display_toml() -> "DisplayToml":
prepare_space()
display_toml_path = Path(settings.EVAL_RESULTS_PATH) / "leaderboard" / "display.toml"
logger.info(f'Loading display.toml from: {display_toml_path.as_posix()!r}')
with display_toml_path.open("rb") as f:
data = toml_load(f)
display_toml = DisplayToml.model_validate(data)
logger.info("Loaded display.toml")
assert display_toml is not None, f"Failed to load display.toml: {display_toml_path.as_posix()!r}"
return display_toml
@lru_cache(maxsize=1)
def get_benchmarks() -> list["MetaToml_Benchmark"]:
meta_toml = load_meta_toml()
display_toml = load_display_toml()
benchmarks_map = {b.key: b for b in meta_toml.benchmarks if not b.disabled}
benchmarks = []
# Sort benchmarks by display order
for key in display_toml.benchmarks_order:
b = benchmarks_map.pop(key, None)
if b is not None:
benchmarks.append(b)
benchmarks.extend(benchmarks_map.values())
logger.info(f"Loaded {len(benchmarks)} benchmarks: titles={[b.title for b in benchmarks]!r}")
return benchmarks