arm-model / model /dataset_split.py
pragadeeshv23's picture
Upload folder using huggingface_hub
5b86813 verified
#!/usr/bin/env python3
"""
Create a validation split from a Roboflow-style "train" folder
by copying a fraction of images and their labels into "valid".
"""
import random
import shutil
from pathlib import Path
def create_val_split(dataset: Path, val_fraction: float = 0.2, seed: int = 42):
train_images = dataset / "train" / "images"
train_labels = dataset / "train" / "labels"
valid_images = dataset / "valid" / "images"
valid_labels = dataset / "valid" / "labels"
if valid_images.exists():
print(f"Validation folder already exists at {valid_images}; skipping.")
return
imgs = list(train_images.glob("*.jpg")) + list(train_images.glob("*.png"))
if not imgs:
raise SystemExit(f"No training images found in {train_images}")
random.seed(seed)
random.shuffle(imgs)
k = int(len(imgs) * val_fraction)
val_imgs = imgs[:k]
valid_images.mkdir(parents=True, exist_ok=True)
valid_labels.mkdir(parents=True, exist_ok=True)
for img in val_imgs:
shutil.copy2(img, valid_images / img.name)
label = train_labels / (img.stem + ".txt")
if label.exists():
shutil.copy2(label, valid_labels / label.name)
print(f"Created validation split with {len(val_imgs)} images at {valid_images}")
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("--dataset", default="dataset", help="path to dataset folder")
parser.add_argument("--val", type=float, default=0.2, help="validation fraction")
args = parser.parse_args()
create_val_split(Path(args.dataset), args.val)