React_native_app / split.py
Charuka66's picture
Upload split.py
77fe907 verified
import os
import shutil
import random
from glob import glob
from tqdm import tqdm
SOURCE_ROOT = r"C:\Users\charu\Documents\goyam\roboflow\train"
SOURCE_IMAGES = os.path.join(SOURCE_ROOT, "images")
SOURCE_LABELS = os.path.join(SOURCE_ROOT, "labels")
DEST_DIR = r"C:\Users\charu\Documents\goyam\roboflow\final_split"
TRAIN_RATIO = 0.8
def split_dataset():
if os.path.exists(DEST_DIR):
print(f"⚠️ Warning: Destination folder already exists: {DEST_DIR}")
print(" (Ideally, delete it before running this to avoid mixing old data!)")
for split in ['train', 'val']:
os.makedirs(os.path.join(DEST_DIR, split, 'images'), exist_ok=True)
os.makedirs(os.path.join(DEST_DIR, split, 'labels'), exist_ok=True)
print(f"🔍 Scanning images in: {SOURCE_IMAGES}")
unique_images = set()
# Check all extensions
exts = ['*.jpg', '*.jpeg', '*.png', '*.JPG', '*.JPEG', '*.PNG']
for ext in exts:
files = glob(os.path.join(SOURCE_IMAGES, ext))
for f in files:
unique_images.add(f)
all_images = list(unique_images)
random.shuffle(all_images)
total_count = len(all_images)
train_count = int(total_count * TRAIN_RATIO)
if total_count == 0:
print(" Error: No images found!")
return
print(f"Found {total_count} unique images.")
print(f" -> Training: {train_count}")
print(f" -> Validation: {total_count - train_count}")
# 3. Copy Files
print("Organizing files...")
for i, img_path in enumerate(tqdm(all_images)):
split = 'train' if i < train_count else 'val'
filename = os.path.basename(img_path)
name_no_ext = os.path.splitext(filename)[0]
dest_img_path = os.path.join(DEST_DIR, split, 'images', filename)
# Check label (Look for .txt)
src_txt_path = os.path.join(SOURCE_LABELS, name_no_ext + ".txt")
dest_txt_path = os.path.join(DEST_DIR, split, 'labels', name_no_ext + ".txt")
shutil.copy(img_path, dest_img_path)
if os.path.exists(src_txt_path):
shutil.copy(src_txt_path, dest_txt_path)
print(f"\nDone! Your dataset is ready at:")
print(f" {DEST_DIR}")
if __name__ == "__main__":
split_dataset()