| import argparse | |
| from huggingface_hub import snapshot_download | |
| """ | |
| python3 scripts/download_hf_data.py --repo_id HuggingFaceFW/fineweb --local_dir ./fineweb/ --allow_patterns sample/10BT/* | |
| """ | |
| if __name__ == "__main__": | |
| parser = argparse.ArgumentParser() | |
| parser.add_argument("--repo_id", type=str, default="HuggingFaceFW/fineweb") | |
| parser.add_argument("--local_dir", type=str, default="./fineweb/") | |
| parser.add_argument("--allow_patterns", type=str, default=None) | |
| args = parser.parse_args() | |
| repo_id = args.repo_id | |
| local_dir = args.local_dir | |
| allow_patterns = args.allow_patterns | |
| folder = snapshot_download( | |
| repo_id, | |
| repo_type="dataset", | |
| local_dir=local_dir, | |
| allow_patterns=allow_patterns, | |
| ) | |