Spaces:

shadow861453
/

VERIDEX.V1

Running

File size: 2,688 Bytes

81f9dfe

"""
VERIDEX — Dataset Split Script (Fixed v2)
==========================================
dataset/train/real + dataset/train/fake nundi
train(70%) / val(15%) / test(15%) ga split chestundi.
"""
import os, shutil, random

# ── CONFIG ──────────────────────────────────────────────────────
REAL_SRC    = "dataset/train/real"
FAKE_SRC    = "dataset/train/fake"
DATASET_DIR = "dataset"

TRAIN_RATIO = 0.70
VAL_RATIO   = 0.15
SEED        = 42
SUPPORTED   = {".jpg", ".jpeg", ".png", ".webp", ".bmp", ".tiff"}

# ── HELPERS ──────────────────────────────────────────────────────
def get_images(folder):
    return [
        f for f in os.listdir(folder)
        if os.path.splitext(f)[1].lower() in SUPPORTED
        and not f.startswith(".")
    ]

def copy_files(files, src_dir, dst_dir):
    os.makedirs(dst_dir, exist_ok=True)
    src_abs = os.path.abspath(src_dir)
    dst_abs = os.path.abspath(dst_dir)
    for f in files:
        src_path = os.path.join(src_abs, f)
        dst_path = os.path.join(dst_abs, f)
        if src_path == dst_path:
            continue
        shutil.copy2(src_path, dst_path)

# ── MAIN ─────────────────────────────────────────────────────────
def split():
    random.seed(SEED)

    for label, src in [("real", REAL_SRC), ("fake", FAKE_SRC)]:
        if not os.path.exists(src):
            print(f"ERROR: {src} folder not found!")
            return

        files = get_images(src)
        random.shuffle(files)
        total = len(files)

        n_train = int(total * TRAIN_RATIO)
        n_val   = int(total * VAL_RATIO)

        train_files = files[:n_train]
        val_files   = files[n_train:n_train + n_val]
        test_files  = files[n_train + n_val:]

        print(f"\n{label.upper()} ({total:,} images):")
        print(f"  train → {len(train_files):,}  (copying...)")
        copy_files(train_files, src, os.path.join(DATASET_DIR, "train", label))
        print(f"  val   → {len(val_files):,}  (copying...)")
        copy_files(val_files,   src, os.path.join(DATASET_DIR, "val",   label))
        print(f"  test  → {len(test_files):,}  (copying...)")
        copy_files(test_files,  src, os.path.join(DATASET_DIR, "test",  label))
        print(f"  ✅ {label} done!")

    print("\n🎉 Split complete!")
    print("   Now run: python train_gpu.py")

if __name__ == "__main__":
    split()