Spaces:
Paused
Paused
| import os | |
| from huggingface_hub import snapshot_download | |
| import argparse | |
| # Configuration | |
| REPO_ID = "Demosthene-OR/rakuten-data" | |
| LOCAL_DIR = "data/preprocessed" | |
| def download_data(repo_id=REPO_ID, local_dir=LOCAL_DIR): | |
| """Downloads the dataset from Hugging Face Hub.""" | |
| print(f"Downloading data from {repo_id} to {local_dir}...") | |
| snapshot_download( | |
| repo_id=repo_id, | |
| repo_type="dataset", | |
| local_dir=local_dir, | |
| local_dir_use_symlinks=False | |
| ) | |
| print("Download complete.") | |
| if __name__ == "__main__": | |
| parser = argparse.ArgumentParser(description="Download rakuten dataset from Hugging Face Hub") | |
| parser.add_argument("--repo_id", default=REPO_ID, help="Hugging Face Dataset Repo ID") | |
| parser.add_argument("--local_dir", default=LOCAL_DIR, help="Local directory to download to") | |
| args = parser.parse_args() | |
| download_data(args.repo_id, args.local_dir) | |