File size: 1,639 Bytes
5b86813
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
#!/usr/bin/env python3
"""
Create a validation split from a Roboflow-style "train" folder
by copying a fraction of images and their labels into "valid".
"""
import random
import shutil
from pathlib import Path


def create_val_split(dataset: Path, val_fraction: float = 0.2, seed: int = 42):
    train_images = dataset / "train" / "images"
    train_labels = dataset / "train" / "labels"
    valid_images = dataset / "valid" / "images"
    valid_labels = dataset / "valid" / "labels"

    if valid_images.exists():
        print(f"Validation folder already exists at {valid_images}; skipping.")
        return

    imgs = list(train_images.glob("*.jpg")) + list(train_images.glob("*.png"))
    if not imgs:
        raise SystemExit(f"No training images found in {train_images}")

    random.seed(seed)
    random.shuffle(imgs)
    k = int(len(imgs) * val_fraction)
    val_imgs = imgs[:k]

    valid_images.mkdir(parents=True, exist_ok=True)
    valid_labels.mkdir(parents=True, exist_ok=True)

    for img in val_imgs:
        shutil.copy2(img, valid_images / img.name)
        label = train_labels / (img.stem + ".txt")
        if label.exists():
            shutil.copy2(label, valid_labels / label.name)

    print(f"Created validation split with {len(val_imgs)} images at {valid_images}")


if __name__ == "__main__":
    import argparse

    parser = argparse.ArgumentParser()
    parser.add_argument("--dataset", default="dataset", help="path to dataset folder")
    parser.add_argument("--val", type=float, default=0.2, help="validation fraction")
    args = parser.parse_args()

    create_val_split(Path(args.dataset), args.val)