Charuka66 commited on
Commit
77fe907
·
verified ·
1 Parent(s): b455adb

Upload split.py

Browse files

Split dataset for yolo26m.pt

Files changed (1) hide show
  1. split.py +25 -18
split.py CHANGED
@@ -4,7 +4,6 @@ import random
4
  from glob import glob
5
  from tqdm import tqdm
6
 
7
-
8
  SOURCE_ROOT = r"C:\Users\charu\Documents\goyam\roboflow\train"
9
  SOURCE_IMAGES = os.path.join(SOURCE_ROOT, "images")
10
  SOURCE_LABELS = os.path.join(SOURCE_ROOT, "labels")
@@ -12,41 +11,51 @@ SOURCE_LABELS = os.path.join(SOURCE_ROOT, "labels")
12
 
13
  DEST_DIR = r"C:\Users\charu\Documents\goyam\roboflow\final_split"
14
 
15
-
16
  TRAIN_RATIO = 0.8
17
 
18
 
19
  def split_dataset():
20
-
 
 
 
 
21
  for split in ['train', 'val']:
22
  os.makedirs(os.path.join(DEST_DIR, split, 'images'), exist_ok=True)
23
  os.makedirs(os.path.join(DEST_DIR, split, 'labels'), exist_ok=True)
24
 
25
 
26
  print(f"🔍 Scanning images in: {SOURCE_IMAGES}")
 
27
 
28
- exts = ['*.jpg', '*.jpeg', '*.png', '*.JPG']
29
- all_images = []
 
 
30
  for ext in exts:
31
- all_images.extend(glob(os.path.join(SOURCE_IMAGES, ext)))
32
-
 
 
 
 
 
33
  random.shuffle(all_images)
34
 
35
  total_count = len(all_images)
36
  train_count = int(total_count * TRAIN_RATIO)
37
 
38
  if total_count == 0:
39
- print("Error: No images found! Check your SOURCE_ROOT path.")
40
  return
41
 
42
- print(f"Found {total_count} images.")
43
  print(f" -> Training: {train_count}")
44
  print(f" -> Validation: {total_count - train_count}")
45
 
46
-
47
- print("📦 Organizing files...")
48
  for i, img_path in enumerate(tqdm(all_images)):
49
-
50
  split = 'train' if i < train_count else 'val'
51
 
52
  filename = os.path.basename(img_path)
@@ -54,19 +63,17 @@ def split_dataset():
54
 
55
  dest_img_path = os.path.join(DEST_DIR, split, 'images', filename)
56
 
 
57
  src_txt_path = os.path.join(SOURCE_LABELS, name_no_ext + ".txt")
58
  dest_txt_path = os.path.join(DEST_DIR, split, 'labels', name_no_ext + ".txt")
59
-
60
  shutil.copy(img_path, dest_img_path)
61
-
62
  if os.path.exists(src_txt_path):
63
  shutil.copy(src_txt_path, dest_txt_path)
64
- else:
65
- print(f"Missing label for {filename} (It might be a background image)")
66
 
67
- print(f"\n Done! Your dataset is ready at:")
68
  print(f" {DEST_DIR}")
69
- print("\nNEXT STEP: Update your data.yaml to point to this new folder!")
70
 
71
  if __name__ == "__main__":
72
  split_dataset()
 
4
  from glob import glob
5
  from tqdm import tqdm
6
 
 
7
  SOURCE_ROOT = r"C:\Users\charu\Documents\goyam\roboflow\train"
8
  SOURCE_IMAGES = os.path.join(SOURCE_ROOT, "images")
9
  SOURCE_LABELS = os.path.join(SOURCE_ROOT, "labels")
 
11
 
12
  DEST_DIR = r"C:\Users\charu\Documents\goyam\roboflow\final_split"
13
 
 
14
  TRAIN_RATIO = 0.8
15
 
16
 
17
  def split_dataset():
18
+
19
+ if os.path.exists(DEST_DIR):
20
+ print(f"⚠️ Warning: Destination folder already exists: {DEST_DIR}")
21
+ print(" (Ideally, delete it before running this to avoid mixing old data!)")
22
+
23
  for split in ['train', 'val']:
24
  os.makedirs(os.path.join(DEST_DIR, split, 'images'), exist_ok=True)
25
  os.makedirs(os.path.join(DEST_DIR, split, 'labels'), exist_ok=True)
26
 
27
 
28
  print(f"🔍 Scanning images in: {SOURCE_IMAGES}")
29
+
30
 
31
+ unique_images = set()
32
+
33
+ # Check all extensions
34
+ exts = ['*.jpg', '*.jpeg', '*.png', '*.JPG', '*.JPEG', '*.PNG']
35
  for ext in exts:
36
+ files = glob(os.path.join(SOURCE_IMAGES, ext))
37
+ for f in files:
38
+ unique_images.add(f)
39
+
40
+ all_images = list(unique_images)
41
+
42
+
43
  random.shuffle(all_images)
44
 
45
  total_count = len(all_images)
46
  train_count = int(total_count * TRAIN_RATIO)
47
 
48
  if total_count == 0:
49
+ print(" Error: No images found!")
50
  return
51
 
52
+ print(f"Found {total_count} unique images.")
53
  print(f" -> Training: {train_count}")
54
  print(f" -> Validation: {total_count - train_count}")
55
 
56
+ # 3. Copy Files
57
+ print("Organizing files...")
58
  for i, img_path in enumerate(tqdm(all_images)):
 
59
  split = 'train' if i < train_count else 'val'
60
 
61
  filename = os.path.basename(img_path)
 
63
 
64
  dest_img_path = os.path.join(DEST_DIR, split, 'images', filename)
65
 
66
+ # Check label (Look for .txt)
67
  src_txt_path = os.path.join(SOURCE_LABELS, name_no_ext + ".txt")
68
  dest_txt_path = os.path.join(DEST_DIR, split, 'labels', name_no_ext + ".txt")
69
+
70
  shutil.copy(img_path, dest_img_path)
71
+
72
  if os.path.exists(src_txt_path):
73
  shutil.copy(src_txt_path, dest_txt_path)
 
 
74
 
75
+ print(f"\nDone! Your dataset is ready at:")
76
  print(f" {DEST_DIR}")
 
77
 
78
  if __name__ == "__main__":
79
  split_dataset()