| from datasets import load_dataset | |
| from pathlib import Path | |
| datasets = [ | |
| "demo_human_or_worm", | |
| "dummy_mouse_enhancers_ensembl", | |
| "human_enhancers_ensembl", | |
| "human_nontata_promoters", | |
| "demo_coding_vs_intergenomic_seqs", | |
| "drosophila_enhancers_stark", | |
| "human_enhancers_cohn", | |
| "human_ensembl_regulatory", | |
| "human_ocr_ensembl", | |
| ] | |
| out_root = Path("hf_raw") | |
| out_root.mkdir(parents=True, exist_ok=True) | |
| for name in datasets: | |
| hf_id = f"katarinagresova/Genomic_Benchmarks_{name}" | |
| ds = load_dataset(hf_id) # downloads to the HF cache | |
| ds.save_to_disk(out_root / name) # optional: persist locally for reuse | |
| print(f"downloaded {hf_id}") | |