File size: 927 Bytes
eb5ec73
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
import os
from huggingface_hub import snapshot_download
import argparse

# Configuration
REPO_ID = "Demosthene-OR/rakuten-data"
LOCAL_DIR = "data/preprocessed"

def download_data(repo_id=REPO_ID, local_dir=LOCAL_DIR):
    """Downloads the dataset from Hugging Face Hub."""
    print(f"Downloading data from {repo_id} to {local_dir}...")
    
    snapshot_download(
        repo_id=repo_id,
        repo_type="dataset",
        local_dir=local_dir,
        local_dir_use_symlinks=False
    )
    print("Download complete.")

if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Download rakuten dataset from Hugging Face Hub")
    parser.add_argument("--repo_id", default=REPO_ID, help="Hugging Face Dataset Repo ID")
    parser.add_argument("--local_dir", default=LOCAL_DIR, help="Local directory to download to")
    
    args = parser.parse_args()
    
    download_data(args.repo_id, args.local_dir)