"""Fetch the English split of BhashaBench-Krishi from HuggingFace. Usage: python scripts/fetch_bbk.py """ from pathlib import Path import shutil from huggingface_hub import hf_hub_download REPO = "bharatgenai/BhashaBench-Krishi" SRC_FILE = "English/test-00000-of-00001.parquet" DST_DIR = Path(__file__).resolve().parent.parent / "data" / "raw" DST_FILE = DST_DIR / "bbk_english.parquet" def main() -> None: DST_DIR.mkdir(parents=True, exist_ok=True) cached = hf_hub_download(repo_id=REPO, filename=SRC_FILE, repo_type="dataset") shutil.copy(cached, DST_FILE) print(f"Downloaded {REPO}/{SRC_FILE} -> {DST_FILE}") if __name__ == "__main__": main()