#!/usr/bin/env python3 """ Create a validation split from a Roboflow-style "train" folder by copying a fraction of images and their labels into "valid". """ import random import shutil from pathlib import Path def create_val_split(dataset: Path, val_fraction: float = 0.2, seed: int = 42): train_images = dataset / "train" / "images" train_labels = dataset / "train" / "labels" valid_images = dataset / "valid" / "images" valid_labels = dataset / "valid" / "labels" if valid_images.exists(): print(f"Validation folder already exists at {valid_images}; skipping.") return imgs = list(train_images.glob("*.jpg")) + list(train_images.glob("*.png")) if not imgs: raise SystemExit(f"No training images found in {train_images}") random.seed(seed) random.shuffle(imgs) k = int(len(imgs) * val_fraction) val_imgs = imgs[:k] valid_images.mkdir(parents=True, exist_ok=True) valid_labels.mkdir(parents=True, exist_ok=True) for img in val_imgs: shutil.copy2(img, valid_images / img.name) label = train_labels / (img.stem + ".txt") if label.exists(): shutil.copy2(label, valid_labels / label.name) print(f"Created validation split with {len(val_imgs)} images at {valid_images}") if __name__ == "__main__": import argparse parser = argparse.ArgumentParser() parser.add_argument("--dataset", default="dataset", help="path to dataset folder") parser.add_argument("--val", type=float, default=0.2, help="validation fraction") args = parser.parse_args() create_val_split(Path(args.dataset), args.val)