File size: 475 Bytes
d6e97b5 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 | import os, shutil
from datasets import load_dataset
out = "/home/ubuntu/data/coco/train2017"
os.makedirs(out, exist_ok=True)
# small slice of train split
ds = load_dataset("coco_captions", "2017", split="train[:1000]")
print("Downloading ~1000 images...")
for i, row in enumerate(ds):
# row['image'] is a PIL image (HF auto-downloads the actual JPEGs)
fn = os.path.join(out, f"{i:012d}.jpg")
row["image"].save(fn, quality=90)
print("✅ Wrote images to:", out)
|