File size: 1,562 Bytes
562cbcf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
"""
Splits the YOLO dataset into 80% training and 20% validation sets.
"""

import random
import shutil
from pathlib import Path
import logging

logging.basicConfig(level=logging.INFO, format="%(message)s")


def split_dataset():
    yolo_base = Path("data/yolo_dataset")
    img_dir = yolo_base / "images"
    lbl_dir = yolo_base / "labels"

    # Create standard YOLO split directories
    for split in ['train', 'val']:
        (img_dir / split).mkdir(exist_ok=True)
        (lbl_dir / split).mkdir(exist_ok=True)

    # Get all images (ignoring the train/val folders we just made)
    images = [f for f in img_dir.glob("*.*") if f.is_file()]

    # Shuffle for randomness, then calculate the 80% split index
    random.seed(42)
    random.shuffle(images)
    split_idx = int(len(images) * 0.8)

    train_imgs = images[:split_idx]
    val_imgs = images[split_idx:]

    def move_files(file_list, split_name):
        for img_path in file_list:
            # Move Image
            shutil.move(str(img_path), str(img_dir / split_name / img_path.name))

            # Find and move corresponding label
            lbl_name = img_path.stem + ".txt"
            lbl_path = lbl_dir / lbl_name
            if lbl_path.exists():
                shutil.move(str(lbl_path), str(lbl_dir / split_name / lbl_name))

    logging.info(f"Splitting dataset: {len(train_imgs)} Train | {len(val_imgs)} Val")
    move_files(train_imgs, 'train')
    move_files(val_imgs, 'val')
    logging.info("✅ Dataset successfully split!")


if __name__ == "__main__":
    split_dataset()