Spaces:
Sleeping
Sleeping
File size: 2,411 Bytes
5bbbbc0 c51e7bb 5bbbbc0 429f4df 5bbbbc0 429f4df 77fe907 5bbbbc0 429f4df 77fe907 429f4df 77fe907 429f4df 77fe907 c51e7bb 5bbbbc0 c51e7bb 429f4df 77fe907 429f4df 77fe907 c51e7bb 5bbbbc0 77fe907 c51e7bb 5bbbbc0 c51e7bb 5bbbbc0 c51e7bb 77fe907 c51e7bb 77fe907 c51e7bb 77fe907 c51e7bb 5bbbbc0 77fe907 429f4df 5bbbbc0 429f4df | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 | import os
import shutil
import random
from glob import glob
from tqdm import tqdm
SOURCE_ROOT = r"C:\Users\charu\Documents\goyam\roboflow\train"
SOURCE_IMAGES = os.path.join(SOURCE_ROOT, "images")
SOURCE_LABELS = os.path.join(SOURCE_ROOT, "labels")
DEST_DIR = r"C:\Users\charu\Documents\goyam\roboflow\final_split"
TRAIN_RATIO = 0.8
def split_dataset():
if os.path.exists(DEST_DIR):
print(f"⚠️ Warning: Destination folder already exists: {DEST_DIR}")
print(" (Ideally, delete it before running this to avoid mixing old data!)")
for split in ['train', 'val']:
os.makedirs(os.path.join(DEST_DIR, split, 'images'), exist_ok=True)
os.makedirs(os.path.join(DEST_DIR, split, 'labels'), exist_ok=True)
print(f"🔍 Scanning images in: {SOURCE_IMAGES}")
unique_images = set()
# Check all extensions
exts = ['*.jpg', '*.jpeg', '*.png', '*.JPG', '*.JPEG', '*.PNG']
for ext in exts:
files = glob(os.path.join(SOURCE_IMAGES, ext))
for f in files:
unique_images.add(f)
all_images = list(unique_images)
random.shuffle(all_images)
total_count = len(all_images)
train_count = int(total_count * TRAIN_RATIO)
if total_count == 0:
print(" Error: No images found!")
return
print(f"Found {total_count} unique images.")
print(f" -> Training: {train_count}")
print(f" -> Validation: {total_count - train_count}")
# 3. Copy Files
print("Organizing files...")
for i, img_path in enumerate(tqdm(all_images)):
split = 'train' if i < train_count else 'val'
filename = os.path.basename(img_path)
name_no_ext = os.path.splitext(filename)[0]
dest_img_path = os.path.join(DEST_DIR, split, 'images', filename)
# Check label (Look for .txt)
src_txt_path = os.path.join(SOURCE_LABELS, name_no_ext + ".txt")
dest_txt_path = os.path.join(DEST_DIR, split, 'labels', name_no_ext + ".txt")
shutil.copy(img_path, dest_img_path)
if os.path.exists(src_txt_path):
shutil.copy(src_txt_path, dest_txt_path)
print(f"\nDone! Your dataset is ready at:")
print(f" {DEST_DIR}")
if __name__ == "__main__":
split_dataset() |