rakuten / src /data /download_data.py
Demosthene-OR's picture
Configure LFS for images and update code
eb5ec73
raw
history blame contribute delete
927 Bytes
import os
from huggingface_hub import snapshot_download
import argparse
# Configuration
REPO_ID = "Demosthene-OR/rakuten-data"
LOCAL_DIR = "data/preprocessed"
def download_data(repo_id=REPO_ID, local_dir=LOCAL_DIR):
"""Downloads the dataset from Hugging Face Hub."""
print(f"Downloading data from {repo_id} to {local_dir}...")
snapshot_download(
repo_id=repo_id,
repo_type="dataset",
local_dir=local_dir,
local_dir_use_symlinks=False
)
print("Download complete.")
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Download rakuten dataset from Hugging Face Hub")
parser.add_argument("--repo_id", default=REPO_ID, help="Hugging Face Dataset Repo ID")
parser.add_argument("--local_dir", default=LOCAL_DIR, help="Local directory to download to")
args = parser.parse_args()
download_data(args.repo_id, args.local_dir)