Spaces:
Running
Running
File size: 2,688 Bytes
81f9dfe | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 | """
VERIDEX β Dataset Split Script (Fixed v2)
==========================================
dataset/train/real + dataset/train/fake nundi
train(70%) / val(15%) / test(15%) ga split chestundi.
"""
import os, shutil, random
# ββ CONFIG ββββββββββββββββββββββββββββββββββββββββββββββββββββββ
REAL_SRC = "dataset/train/real"
FAKE_SRC = "dataset/train/fake"
DATASET_DIR = "dataset"
TRAIN_RATIO = 0.70
VAL_RATIO = 0.15
SEED = 42
SUPPORTED = {".jpg", ".jpeg", ".png", ".webp", ".bmp", ".tiff"}
# ββ HELPERS ββββββββββββββββββββββββββββββββββββββββββββββββββββββ
def get_images(folder):
return [
f for f in os.listdir(folder)
if os.path.splitext(f)[1].lower() in SUPPORTED
and not f.startswith(".")
]
def copy_files(files, src_dir, dst_dir):
os.makedirs(dst_dir, exist_ok=True)
src_abs = os.path.abspath(src_dir)
dst_abs = os.path.abspath(dst_dir)
for f in files:
src_path = os.path.join(src_abs, f)
dst_path = os.path.join(dst_abs, f)
if src_path == dst_path:
continue
shutil.copy2(src_path, dst_path)
# ββ MAIN βββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
def split():
random.seed(SEED)
for label, src in [("real", REAL_SRC), ("fake", FAKE_SRC)]:
if not os.path.exists(src):
print(f"ERROR: {src} folder not found!")
return
files = get_images(src)
random.shuffle(files)
total = len(files)
n_train = int(total * TRAIN_RATIO)
n_val = int(total * VAL_RATIO)
train_files = files[:n_train]
val_files = files[n_train:n_train + n_val]
test_files = files[n_train + n_val:]
print(f"\n{label.upper()} ({total:,} images):")
print(f" train β {len(train_files):,} (copying...)")
copy_files(train_files, src, os.path.join(DATASET_DIR, "train", label))
print(f" val β {len(val_files):,} (copying...)")
copy_files(val_files, src, os.path.join(DATASET_DIR, "val", label))
print(f" test β {len(test_files):,} (copying...)")
copy_files(test_files, src, os.path.join(DATASET_DIR, "test", label))
print(f" β
{label} done!")
print("\nπ Split complete!")
print(" Now run: python train_gpu.py")
if __name__ == "__main__":
split()
|