File size: 677 Bytes
0dbbebb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
from datasets import load_dataset
from pathlib import Path

datasets = [
    "demo_human_or_worm",
    "dummy_mouse_enhancers_ensembl",
    "human_enhancers_ensembl",
    "human_nontata_promoters",
    "demo_coding_vs_intergenomic_seqs",
    "drosophila_enhancers_stark",
    "human_enhancers_cohn",
    "human_ensembl_regulatory",
    "human_ocr_ensembl",
]

out_root = Path("hf_raw")
out_root.mkdir(parents=True, exist_ok=True)

for name in datasets:
    hf_id = f"katarinagresova/Genomic_Benchmarks_{name}"
    ds = load_dataset(hf_id)  # downloads to the HF cache
    ds.save_to_disk(out_root / name)  # optional: persist locally for reuse
    print(f"downloaded {hf_id}")