amul-ai-eval / scripts /fetch_bbk.py
bpHigh's picture
HF Space: add charts tab
74e6b83
Raw
History Blame Contribute Delete
677 Bytes
"""Fetch the English split of BhashaBench-Krishi from HuggingFace.
Usage:
python scripts/fetch_bbk.py
"""
from pathlib import Path
import shutil
from huggingface_hub import hf_hub_download
REPO = "bharatgenai/BhashaBench-Krishi"
SRC_FILE = "English/test-00000-of-00001.parquet"
DST_DIR = Path(__file__).resolve().parent.parent / "data" / "raw"
DST_FILE = DST_DIR / "bbk_english.parquet"
def main() -> None:
DST_DIR.mkdir(parents=True, exist_ok=True)
cached = hf_hub_download(repo_id=REPO, filename=SRC_FILE, repo_type="dataset")
shutil.copy(cached, DST_FILE)
print(f"Downloaded {REPO}/{SRC_FILE} -> {DST_FILE}")
if __name__ == "__main__":
main()