leaderboard

Running on CPU Upgrade

nan commited on Oct 17, 2024

Commit

0af261c

1 Parent(s): 729aa2a

test: add unit tests for envs

Files changed (2) hide show

src/envs.py CHANGED Viewed

@@ -6,7 +6,7 @@ from huggingface_hub import HfApi
 # ----------------------------------
 TOKEN = os.environ.get("TOKEN", "")  # A read/write token for your org
-OWNER = "AIR-Bench"  # "nan"  # Change to your org - don't forget to create a results and request dataset, with the correct format!
 # ----------------------------------
 REPO_ID = f"{OWNER}/leaderboard"
@@ -15,7 +15,7 @@ RESULTS_REPO = f"{OWNER}/eval_results"
 # repo for submitting the evaluation
 SEARCH_RESULTS_REPO = f"{OWNER}/search_results"
-# If you setup a cache later, just change HF_HOME
 CACHE_PATH = os.getenv("HF_HOME", ".")
 # Local caches

 # ----------------------------------
 TOKEN = os.environ.get("TOKEN", "")  # A read/write token for your org
+OWNER = "AIR-Bench"  # Change to your org - don't forget to create a results and request dataset, with the correct format!
 # ----------------------------------
 REPO_ID = f"{OWNER}/leaderboard"
 # repo for submitting the evaluation
 SEARCH_RESULTS_REPO = f"{OWNER}/search_results"
+# If you set up a cache later, just change HF_HOME
 CACHE_PATH = os.getenv("HF_HOME", ".")
 # Local caches

tests/src/test_envs.py ADDED Viewed

+from air_benchmark.tasks import BenchmarkTable
+from src.envs import BENCHMARK_VERSION_LIST, DEFAULT_METRIC_QA, DEFAULT_METRIC_LONG_DOC, METRIC_LIST
+def test_benchmark_version_list():
+    leaderboard_versions = frozenset(BENCHMARK_VERSION_LIST)
+    available_versions = frozenset([k for k in BenchmarkTable.keys()])
+    assert leaderboard_versions.issubset(
+        available_versions)
+def test_default_metrics():
+    assert DEFAULT_METRIC_QA in METRIC_LIST
+    assert DEFAULT_METRIC_LONG_DOC in METRIC_LIST