Spaces:
Sleeping
Sleeping
File size: 677 Bytes
74e6b83 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 | """Fetch the English split of BhashaBench-Krishi from HuggingFace.
Usage:
python scripts/fetch_bbk.py
"""
from pathlib import Path
import shutil
from huggingface_hub import hf_hub_download
REPO = "bharatgenai/BhashaBench-Krishi"
SRC_FILE = "English/test-00000-of-00001.parquet"
DST_DIR = Path(__file__).resolve().parent.parent / "data" / "raw"
DST_FILE = DST_DIR / "bbk_english.parquet"
def main() -> None:
DST_DIR.mkdir(parents=True, exist_ok=True)
cached = hf_hub_download(repo_id=REPO, filename=SRC_FILE, repo_type="dataset")
shutil.copy(cached, DST_FILE)
print(f"Downloaded {REPO}/{SRC_FILE} -> {DST_FILE}")
if __name__ == "__main__":
main()
|