Spaces:

Goyamproject
/

React_native_app

Sleeping

App Files Files Community

Charuka66 commited on 24 days ago

Commit

77fe907

verified ·

1 Parent(s): b455adb

Upload split.py

Browse files

Split dataset for yolo26m.pt

Files changed (1) hide show

split.py +25 -18

split.py CHANGED Viewed

@@ -4,7 +4,6 @@ import random
 from glob import glob
 from tqdm import tqdm
 SOURCE_ROOT = r"C:\Users\charu\Documents\goyam\roboflow\train"
 SOURCE_IMAGES = os.path.join(SOURCE_ROOT, "images")
 SOURCE_LABELS = os.path.join(SOURCE_ROOT, "labels")
@@ -12,41 +11,51 @@ SOURCE_LABELS = os.path.join(SOURCE_ROOT, "labels")
 DEST_DIR = r"C:\Users\charu\Documents\goyam\roboflow\final_split"
 TRAIN_RATIO = 0.8
 def split_dataset():
     for split in ['train', 'val']:
         os.makedirs(os.path.join(DEST_DIR, split, 'images'), exist_ok=True)
         os.makedirs(os.path.join(DEST_DIR, split, 'labels'), exist_ok=True)
     print(f"🔍 Scanning images in: {SOURCE_IMAGES}")
-    exts = ['*.jpg', '*.jpeg', '*.png', '*.JPG']
-    all_images = []
     for ext in exts:
-        all_images.extend(glob(os.path.join(SOURCE_IMAGES, ext)))
     random.shuffle(all_images)
     total_count = len(all_images)
     train_count = int(total_count * TRAIN_RATIO)
     if total_count == 0:
-        print("Error: No images found! Check your SOURCE_ROOT path.")
         return
-    print(f"Found {total_count} images.")
     print(f"   -> Training: {train_count}")
     print(f"   -> Validation: {total_count - train_count}")
-    print("📦 Organizing files...")
     for i, img_path in enumerate(tqdm(all_images)):
         split = 'train' if i < train_count else 'val'
         filename = os.path.basename(img_path)
@@ -54,19 +63,17 @@ def split_dataset():
         dest_img_path = os.path.join(DEST_DIR, split, 'images', filename)
         src_txt_path = os.path.join(SOURCE_LABELS, name_no_ext + ".txt")
         dest_txt_path = os.path.join(DEST_DIR, split, 'labels', name_no_ext + ".txt")
         shutil.copy(img_path, dest_img_path)
         if os.path.exists(src_txt_path):
             shutil.copy(src_txt_path, dest_txt_path)
-        else:
-            print(f"Missing label for {filename} (It might be a background image)")
-    print(f"\n Done! Your dataset is ready at:")
     print(f"   {DEST_DIR}")
-    print("\nNEXT STEP: Update your data.yaml to point to this new folder!")
 if __name__ == "__main__":
     split_dataset()

 from glob import glob
 from tqdm import tqdm
 SOURCE_ROOT = r"C:\Users\charu\Documents\goyam\roboflow\train"
 SOURCE_IMAGES = os.path.join(SOURCE_ROOT, "images")
 SOURCE_LABELS = os.path.join(SOURCE_ROOT, "labels")
 DEST_DIR = r"C:\Users\charu\Documents\goyam\roboflow\final_split"
 TRAIN_RATIO = 0.8
 def split_dataset():
+    if os.path.exists(DEST_DIR):
+        print(f"⚠️ Warning: Destination folder already exists: {DEST_DIR}")
+        print("   (Ideally, delete it before running this to avoid mixing old data!)")
     for split in ['train', 'val']:
         os.makedirs(os.path.join(DEST_DIR, split, 'images'), exist_ok=True)
         os.makedirs(os.path.join(DEST_DIR, split, 'labels'), exist_ok=True)
     print(f"🔍 Scanning images in: {SOURCE_IMAGES}")
+    unique_images = set()
+    # Check all extensions
+    exts = ['*.jpg', '*.jpeg', '*.png', '*.JPG', '*.JPEG', '*.PNG']
     for ext in exts:
+        files = glob(os.path.join(SOURCE_IMAGES, ext))
+        for f in files:
+            unique_images.add(f)
+    all_images = list(unique_images)
     random.shuffle(all_images)
     total_count = len(all_images)
     train_count = int(total_count * TRAIN_RATIO)
     if total_count == 0:
+        print(" Error: No images found!")
         return
+    print(f"Found {total_count} unique images.")
     print(f"   -> Training: {train_count}")
     print(f"   -> Validation: {total_count - train_count}")
+    # 3. Copy Files
+    print("Organizing files...")
     for i, img_path in enumerate(tqdm(all_images)):
         split = 'train' if i < train_count else 'val'
         filename = os.path.basename(img_path)
         dest_img_path = os.path.join(DEST_DIR, split, 'images', filename)
+        # Check label (Look for .txt)
         src_txt_path = os.path.join(SOURCE_LABELS, name_no_ext + ".txt")
         dest_txt_path = os.path.join(DEST_DIR, split, 'labels', name_no_ext + ".txt")
         shutil.copy(img_path, dest_img_path)
         if os.path.exists(src_txt_path):
             shutil.copy(src_txt_path, dest_txt_path)
+    print(f"\nDone! Your dataset is ready at:")
     print(f"   {DEST_DIR}")
 if __name__ == "__main__":
     split_dataset()