import os from huggingface_hub import snapshot_download import argparse # Configuration REPO_ID = "Demosthene-OR/rakuten-data" LOCAL_DIR = "data/preprocessed" def download_data(repo_id=REPO_ID, local_dir=LOCAL_DIR): """Downloads the dataset from Hugging Face Hub.""" print(f"Downloading data from {repo_id} to {local_dir}...") snapshot_download( repo_id=repo_id, repo_type="dataset", local_dir=local_dir, local_dir_use_symlinks=False ) print("Download complete.") if __name__ == "__main__": parser = argparse.ArgumentParser(description="Download rakuten dataset from Hugging Face Hub") parser.add_argument("--repo_id", default=REPO_ID, help="Hugging Face Dataset Repo ID") parser.add_argument("--local_dir", default=LOCAL_DIR, help="Local directory to download to") args = parser.parse_args() download_data(args.repo_id, args.local_dir)