from datasets import load_dataset from pathlib import Path datasets = [ "demo_human_or_worm", "dummy_mouse_enhancers_ensembl", "human_enhancers_ensembl", "human_nontata_promoters", "demo_coding_vs_intergenomic_seqs", "drosophila_enhancers_stark", "human_enhancers_cohn", "human_ensembl_regulatory", "human_ocr_ensembl", ] out_root = Path("hf_raw") out_root.mkdir(parents=True, exist_ok=True) for name in datasets: hf_id = f"katarinagresova/Genomic_Benchmarks_{name}" ds = load_dataset(hf_id) # downloads to the HF cache ds.save_to_disk(out_root / name) # optional: persist locally for reuse print(f"downloaded {hf_id}")