Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
test: add unit tests for envs
Browse files- src/envs.py +2 -2
- tests/src/test_envs.py +15 -0
src/envs.py
CHANGED
|
@@ -6,7 +6,7 @@ from huggingface_hub import HfApi
|
|
| 6 |
# ----------------------------------
|
| 7 |
TOKEN = os.environ.get("TOKEN", "") # A read/write token for your org
|
| 8 |
|
| 9 |
-
OWNER = "AIR-Bench" #
|
| 10 |
# ----------------------------------
|
| 11 |
|
| 12 |
REPO_ID = f"{OWNER}/leaderboard"
|
|
@@ -15,7 +15,7 @@ RESULTS_REPO = f"{OWNER}/eval_results"
|
|
| 15 |
# repo for submitting the evaluation
|
| 16 |
SEARCH_RESULTS_REPO = f"{OWNER}/search_results"
|
| 17 |
|
| 18 |
-
# If you
|
| 19 |
CACHE_PATH = os.getenv("HF_HOME", ".")
|
| 20 |
|
| 21 |
# Local caches
|
|
|
|
| 6 |
# ----------------------------------
|
| 7 |
TOKEN = os.environ.get("TOKEN", "") # A read/write token for your org
|
| 8 |
|
| 9 |
+
OWNER = "AIR-Bench" # Change to your org - don't forget to create a results and request dataset, with the correct format!
|
| 10 |
# ----------------------------------
|
| 11 |
|
| 12 |
REPO_ID = f"{OWNER}/leaderboard"
|
|
|
|
| 15 |
# repo for submitting the evaluation
|
| 16 |
SEARCH_RESULTS_REPO = f"{OWNER}/search_results"
|
| 17 |
|
| 18 |
+
# If you set up a cache later, just change HF_HOME
|
| 19 |
CACHE_PATH = os.getenv("HF_HOME", ".")
|
| 20 |
|
| 21 |
# Local caches
|
tests/src/test_envs.py
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from air_benchmark.tasks import BenchmarkTable
|
| 2 |
+
|
| 3 |
+
from src.envs import BENCHMARK_VERSION_LIST, DEFAULT_METRIC_QA, DEFAULT_METRIC_LONG_DOC, METRIC_LIST
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
def test_benchmark_version_list():
|
| 7 |
+
leaderboard_versions = frozenset(BENCHMARK_VERSION_LIST)
|
| 8 |
+
available_versions = frozenset([k for k in BenchmarkTable.keys()])
|
| 9 |
+
assert leaderboard_versions.issubset(
|
| 10 |
+
available_versions)
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
def test_default_metrics():
|
| 14 |
+
assert DEFAULT_METRIC_QA in METRIC_LIST
|
| 15 |
+
assert DEFAULT_METRIC_LONG_DOC in METRIC_LIST
|