| |
| """ |
| Create a validation split from a Roboflow-style "train" folder |
| by copying a fraction of images and their labels into "valid". |
| """ |
| import random |
| import shutil |
| from pathlib import Path |
|
|
|
|
| def create_val_split(dataset: Path, val_fraction: float = 0.2, seed: int = 42): |
| train_images = dataset / "train" / "images" |
| train_labels = dataset / "train" / "labels" |
| valid_images = dataset / "valid" / "images" |
| valid_labels = dataset / "valid" / "labels" |
|
|
| if valid_images.exists(): |
| print(f"Validation folder already exists at {valid_images}; skipping.") |
| return |
|
|
| imgs = list(train_images.glob("*.jpg")) + list(train_images.glob("*.png")) |
| if not imgs: |
| raise SystemExit(f"No training images found in {train_images}") |
|
|
| random.seed(seed) |
| random.shuffle(imgs) |
| k = int(len(imgs) * val_fraction) |
| val_imgs = imgs[:k] |
|
|
| valid_images.mkdir(parents=True, exist_ok=True) |
| valid_labels.mkdir(parents=True, exist_ok=True) |
|
|
| for img in val_imgs: |
| shutil.copy2(img, valid_images / img.name) |
| label = train_labels / (img.stem + ".txt") |
| if label.exists(): |
| shutil.copy2(label, valid_labels / label.name) |
|
|
| print(f"Created validation split with {len(val_imgs)} images at {valid_images}") |
|
|
|
|
| if __name__ == "__main__": |
| import argparse |
|
|
| parser = argparse.ArgumentParser() |
| parser.add_argument("--dataset", default="dataset", help="path to dataset folder") |
| parser.add_argument("--val", type=float, default=0.2, help="validation fraction") |
| args = parser.parse_args() |
|
|
| create_val_split(Path(args.dataset), args.val) |
|
|