import os, shutil from datasets import load_dataset out = "/home/ubuntu/data/coco/train2017" os.makedirs(out, exist_ok=True) # small slice of train split ds = load_dataset("coco_captions", "2017", split="train[:1000]") print("Downloading ~1000 images...") for i, row in enumerate(ds): # row['image'] is a PIL image (HF auto-downloads the actual JPEGs) fn = os.path.join(out, f"{i:012d}.jpg") row["image"].save(fn, quality=90) print("✅ Wrote images to:", out)