Spaces:
Sleeping
Sleeping
| """ | |
| Splits the YOLO dataset into 80% training and 20% validation sets. | |
| """ | |
| import random | |
| import shutil | |
| from pathlib import Path | |
| import logging | |
| logging.basicConfig(level=logging.INFO, format="%(message)s") | |
| def split_dataset(): | |
| yolo_base = Path("data/yolo_dataset") | |
| img_dir = yolo_base / "images" | |
| lbl_dir = yolo_base / "labels" | |
| # Create standard YOLO split directories | |
| for split in ['train', 'val']: | |
| (img_dir / split).mkdir(exist_ok=True) | |
| (lbl_dir / split).mkdir(exist_ok=True) | |
| # Get all images (ignoring the train/val folders we just made) | |
| images = [f for f in img_dir.glob("*.*") if f.is_file()] | |
| # Shuffle for randomness, then calculate the 80% split index | |
| random.seed(42) | |
| random.shuffle(images) | |
| split_idx = int(len(images) * 0.8) | |
| train_imgs = images[:split_idx] | |
| val_imgs = images[split_idx:] | |
| def move_files(file_list, split_name): | |
| for img_path in file_list: | |
| # Move Image | |
| shutil.move(str(img_path), str(img_dir / split_name / img_path.name)) | |
| # Find and move corresponding label | |
| lbl_name = img_path.stem + ".txt" | |
| lbl_path = lbl_dir / lbl_name | |
| if lbl_path.exists(): | |
| shutil.move(str(lbl_path), str(lbl_dir / split_name / lbl_name)) | |
| logging.info(f"Splitting dataset: {len(train_imgs)} Train | {len(val_imgs)} Val") | |
| move_files(train_imgs, 'train') | |
| move_files(val_imgs, 'val') | |
| logging.info("✅ Dataset successfully split!") | |
| if __name__ == "__main__": | |
| split_dataset() |