Spaces:
Sleeping
Sleeping
| import os | |
| import shutil | |
| import random | |
| from glob import glob | |
| from tqdm import tqdm | |
| SOURCE_ROOT = r"C:\Users\charu\Documents\goyam\roboflow\train" | |
| SOURCE_IMAGES = os.path.join(SOURCE_ROOT, "images") | |
| SOURCE_LABELS = os.path.join(SOURCE_ROOT, "labels") | |
| DEST_DIR = r"C:\Users\charu\Documents\goyam\roboflow\final_split" | |
| TRAIN_RATIO = 0.8 | |
| def split_dataset(): | |
| if os.path.exists(DEST_DIR): | |
| print(f"⚠️ Warning: Destination folder already exists: {DEST_DIR}") | |
| print(" (Ideally, delete it before running this to avoid mixing old data!)") | |
| for split in ['train', 'val']: | |
| os.makedirs(os.path.join(DEST_DIR, split, 'images'), exist_ok=True) | |
| os.makedirs(os.path.join(DEST_DIR, split, 'labels'), exist_ok=True) | |
| print(f"🔍 Scanning images in: {SOURCE_IMAGES}") | |
| unique_images = set() | |
| # Check all extensions | |
| exts = ['*.jpg', '*.jpeg', '*.png', '*.JPG', '*.JPEG', '*.PNG'] | |
| for ext in exts: | |
| files = glob(os.path.join(SOURCE_IMAGES, ext)) | |
| for f in files: | |
| unique_images.add(f) | |
| all_images = list(unique_images) | |
| random.shuffle(all_images) | |
| total_count = len(all_images) | |
| train_count = int(total_count * TRAIN_RATIO) | |
| if total_count == 0: | |
| print(" Error: No images found!") | |
| return | |
| print(f"Found {total_count} unique images.") | |
| print(f" -> Training: {train_count}") | |
| print(f" -> Validation: {total_count - train_count}") | |
| # 3. Copy Files | |
| print("Organizing files...") | |
| for i, img_path in enumerate(tqdm(all_images)): | |
| split = 'train' if i < train_count else 'val' | |
| filename = os.path.basename(img_path) | |
| name_no_ext = os.path.splitext(filename)[0] | |
| dest_img_path = os.path.join(DEST_DIR, split, 'images', filename) | |
| # Check label (Look for .txt) | |
| src_txt_path = os.path.join(SOURCE_LABELS, name_no_ext + ".txt") | |
| dest_txt_path = os.path.join(DEST_DIR, split, 'labels', name_no_ext + ".txt") | |
| shutil.copy(img_path, dest_img_path) | |
| if os.path.exists(src_txt_path): | |
| shutil.copy(src_txt_path, dest_txt_path) | |
| print(f"\nDone! Your dataset is ready at:") | |
| print(f" {DEST_DIR}") | |
| if __name__ == "__main__": | |
| split_dataset() |