File size: 677 Bytes
74e6b83
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
"""Fetch the English split of BhashaBench-Krishi from HuggingFace.

Usage:
    python scripts/fetch_bbk.py
"""
from pathlib import Path
import shutil

from huggingface_hub import hf_hub_download

REPO = "bharatgenai/BhashaBench-Krishi"
SRC_FILE = "English/test-00000-of-00001.parquet"
DST_DIR = Path(__file__).resolve().parent.parent / "data" / "raw"
DST_FILE = DST_DIR / "bbk_english.parquet"


def main() -> None:
    DST_DIR.mkdir(parents=True, exist_ok=True)
    cached = hf_hub_download(repo_id=REPO, filename=SRC_FILE, repo_type="dataset")
    shutil.copy(cached, DST_FILE)
    print(f"Downloaded {REPO}/{SRC_FILE} -> {DST_FILE}")


if __name__ == "__main__":
    main()