Spaces:

Goyamproject
/

React_native_app

Sleeping

App Files Files Community

Charuka66 commited on Feb 13

Commit

5bbbbc0

verified ·

1 Parent(s): b730b20

Add script to split dataset into train and validation sets

Browse files

This script organizes the balanced seed dataset into the required YOLO folder structure (train and val).
Randomly shuffles and splits the dataset using an 80% Training / 20% Validation ratio.
Automatically routes both images and their corresponding .txt segmentation labels.
Includes a safety fallback to generate empty label files for background/healthy images, preventing YOLO training errors.

Files changed (1) hide show

split.py +59 -0

split.py ADDED Viewed

	@@ -0,0 +1,59 @@

+import os
+import shutil
+import random
+from glob import glob
+# ================= CONFIGURATION =================
+# Input Folders (Where your 400 images are now)
+SOURCE_IMAGES = r"C:\Users\charu\Desktop\My_Project\seed_images"
+SOURCE_LABELS = r"C:\Users\charu\Desktop\My_Project\seed_labels"
+# Output Folder (Where the training data will go)
+DEST_DIR = r"C:\Users\charu\Desktop\My_Project\teacher_v2_dataset"
+# Split Ratio (80% Train, 20% Validation)
+TRAIN_RATIO = 0.8
+# =================================================
+def split_data():
+    # 1. Create Folders
+    for split in ['train', 'val']:
+        os.makedirs(os.path.join(DEST_DIR, split, 'images'), exist_ok=True)
+        os.makedirs(os.path.join(DEST_DIR, split, 'labels'), exist_ok=True)
+    # 2. Get List of All Images
+    images = glob(os.path.join(SOURCE_IMAGES, "*.jpg")) + glob(os.path.join(SOURCE_IMAGES, "*.png"))
+    random.shuffle(images)
+    total = len(images)
+    train_count = int(total * TRAIN_RATIO)
+    print(f"📦 Organizing {total} images...")
+    print(f"   - Training: {train_count}")
+    print(f"   - Validation: {total - train_count}")
+    # 3. Move Files
+    for i, img_path in enumerate(images):
+        filename = os.path.basename(img_path)
+        label_name = os.path.splitext(filename)[0] + ".txt"
+        label_path = os.path.join(SOURCE_LABELS, label_name)
+        # Decide destination (Train or Val)
+        split = 'train' if i < train_count else 'val'
+        # Copy Image
+        shutil.copy(img_path, os.path.join(DEST_DIR, split, 'images', filename))
+        # Copy Label (If it exists)
+        if os.path.exists(label_path):
+            shutil.copy(label_path, os.path.join(DEST_DIR, split, 'labels', label_name))
+        else:
+            # If no label exists (shouldn't happen, but just in case), create an empty one for "Healthy"
+            # This ensures YOLO knows it's a background image
+            with open(os.path.join(DEST_DIR, split, 'labels', label_name), 'w') as f:
+                pass
+    print(f"\n✅ Done! Dataset ready at: {DEST_DIR}")
+if __name__ == "__main__":
+    split_data()