Spaces:
Running
Running
| """ | |
| VERIDEX β Dataset Split Script (Fixed v2) | |
| ========================================== | |
| dataset/train/real + dataset/train/fake nundi | |
| train(70%) / val(15%) / test(15%) ga split chestundi. | |
| """ | |
| import os, shutil, random | |
| # ββ CONFIG ββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| REAL_SRC = "dataset/train/real" | |
| FAKE_SRC = "dataset/train/fake" | |
| DATASET_DIR = "dataset" | |
| TRAIN_RATIO = 0.70 | |
| VAL_RATIO = 0.15 | |
| SEED = 42 | |
| SUPPORTED = {".jpg", ".jpeg", ".png", ".webp", ".bmp", ".tiff"} | |
| # ββ HELPERS ββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def get_images(folder): | |
| return [ | |
| f for f in os.listdir(folder) | |
| if os.path.splitext(f)[1].lower() in SUPPORTED | |
| and not f.startswith(".") | |
| ] | |
| def copy_files(files, src_dir, dst_dir): | |
| os.makedirs(dst_dir, exist_ok=True) | |
| src_abs = os.path.abspath(src_dir) | |
| dst_abs = os.path.abspath(dst_dir) | |
| for f in files: | |
| src_path = os.path.join(src_abs, f) | |
| dst_path = os.path.join(dst_abs, f) | |
| if src_path == dst_path: | |
| continue | |
| shutil.copy2(src_path, dst_path) | |
| # ββ MAIN βββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def split(): | |
| random.seed(SEED) | |
| for label, src in [("real", REAL_SRC), ("fake", FAKE_SRC)]: | |
| if not os.path.exists(src): | |
| print(f"ERROR: {src} folder not found!") | |
| return | |
| files = get_images(src) | |
| random.shuffle(files) | |
| total = len(files) | |
| n_train = int(total * TRAIN_RATIO) | |
| n_val = int(total * VAL_RATIO) | |
| train_files = files[:n_train] | |
| val_files = files[n_train:n_train + n_val] | |
| test_files = files[n_train + n_val:] | |
| print(f"\n{label.upper()} ({total:,} images):") | |
| print(f" train β {len(train_files):,} (copying...)") | |
| copy_files(train_files, src, os.path.join(DATASET_DIR, "train", label)) | |
| print(f" val β {len(val_files):,} (copying...)") | |
| copy_files(val_files, src, os.path.join(DATASET_DIR, "val", label)) | |
| print(f" test β {len(test_files):,} (copying...)") | |
| copy_files(test_files, src, os.path.join(DATASET_DIR, "test", label)) | |
| print(f" β {label} done!") | |
| print("\nπ Split complete!") | |
| print(" Now run: python train_gpu.py") | |
| if __name__ == "__main__": | |
| split() | |