""" VERIDEX — Dataset Split Script (Fixed v2) ========================================== dataset/train/real + dataset/train/fake nundi train(70%) / val(15%) / test(15%) ga split chestundi. """ import os, shutil, random # ── CONFIG ────────────────────────────────────────────────────── REAL_SRC = "dataset/train/real" FAKE_SRC = "dataset/train/fake" DATASET_DIR = "dataset" TRAIN_RATIO = 0.70 VAL_RATIO = 0.15 SEED = 42 SUPPORTED = {".jpg", ".jpeg", ".png", ".webp", ".bmp", ".tiff"} # ── HELPERS ────────────────────────────────────────────────────── def get_images(folder): return [ f for f in os.listdir(folder) if os.path.splitext(f)[1].lower() in SUPPORTED and not f.startswith(".") ] def copy_files(files, src_dir, dst_dir): os.makedirs(dst_dir, exist_ok=True) src_abs = os.path.abspath(src_dir) dst_abs = os.path.abspath(dst_dir) for f in files: src_path = os.path.join(src_abs, f) dst_path = os.path.join(dst_abs, f) if src_path == dst_path: continue shutil.copy2(src_path, dst_path) # ── MAIN ───────────────────────────────────────────────────────── def split(): random.seed(SEED) for label, src in [("real", REAL_SRC), ("fake", FAKE_SRC)]: if not os.path.exists(src): print(f"ERROR: {src} folder not found!") return files = get_images(src) random.shuffle(files) total = len(files) n_train = int(total * TRAIN_RATIO) n_val = int(total * VAL_RATIO) train_files = files[:n_train] val_files = files[n_train:n_train + n_val] test_files = files[n_train + n_val:] print(f"\n{label.upper()} ({total:,} images):") print(f" train → {len(train_files):,} (copying...)") copy_files(train_files, src, os.path.join(DATASET_DIR, "train", label)) print(f" val → {len(val_files):,} (copying...)") copy_files(val_files, src, os.path.join(DATASET_DIR, "val", label)) print(f" test → {len(test_files):,} (copying...)") copy_files(test_files, src, os.path.join(DATASET_DIR, "test", label)) print(f" ✅ {label} done!") print("\n🎉 Split complete!") print(" Now run: python train_gpu.py") if __name__ == "__main__": split()