Spaces:
Sleeping
Sleeping
| """Fetch the English split of BhashaBench-Krishi from HuggingFace. | |
| Usage: | |
| python scripts/fetch_bbk.py | |
| """ | |
| from pathlib import Path | |
| import shutil | |
| from huggingface_hub import hf_hub_download | |
| REPO = "bharatgenai/BhashaBench-Krishi" | |
| SRC_FILE = "English/test-00000-of-00001.parquet" | |
| DST_DIR = Path(__file__).resolve().parent.parent / "data" / "raw" | |
| DST_FILE = DST_DIR / "bbk_english.parquet" | |
| def main() -> None: | |
| DST_DIR.mkdir(parents=True, exist_ok=True) | |
| cached = hf_hub_download(repo_id=REPO, filename=SRC_FILE, repo_type="dataset") | |
| shutil.copy(cached, DST_FILE) | |
| print(f"Downloaded {REPO}/{SRC_FILE} -> {DST_FILE}") | |
| if __name__ == "__main__": | |
| main() | |