Spaces:
Running
Running
| """ | |
| scripts/prepare_ifakefakedb.py | |
| Prepares the iFakeFaceDB dataset for fingerprint engine training. | |
| Kaggle slug: tapakah68/artificial-faces-dataset or similar. | |
| iFakeFaceDB contains ~87k StyleGAN-generated fake faces, useful for | |
| increasing unknown_gan class coverage. | |
| Kaggle usage: | |
| !python scripts/prepare_ifakefakedb.py \ | |
| --source /kaggle/input/artificial-faces-dataset \ | |
| --output /kaggle/working/processed/fingerprint \ | |
| --max 20000 | |
| """ | |
| from __future__ import annotations | |
| import argparse | |
| import logging | |
| import random | |
| import shutil | |
| from pathlib import Path | |
| logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s") | |
| log = logging.getLogger(__name__) | |
| IMG_EXTS = {".jpg", ".jpeg", ".png"} | |
| def main(args: argparse.Namespace) -> None: | |
| source = Path(args.source) | |
| if not source.exists(): | |
| log.error(f"Source not found: {source}") | |
| return | |
| rng = random.Random(args.seed) | |
| imgs = [p for p in source.rglob("*") if p.suffix.lower() in IMG_EXTS] | |
| rng.shuffle(imgs) | |
| imgs = imgs[:args.max] | |
| n_train = int(len(imgs) * 0.85) | |
| splits = {"train": imgs[:n_train], "val": imgs[n_train:]} | |
| for split, subset in splits.items(): | |
| dst_dir = Path(args.output) / split / "fake" | |
| dst_dir.mkdir(parents=True, exist_ok=True) | |
| for img in subset: | |
| dst = dst_dir / f"ifake_{img.name}" | |
| if not dst.exists(): | |
| shutil.copy2(img, dst) | |
| log.info(f" {split}/fake: {len(subset)} images (generator: unknown_gan / StyleGAN)") | |
| log.info("iFakeFaceDB preparation complete.") | |
| def parse_args(): | |
| p = argparse.ArgumentParser() | |
| p.add_argument("--source", default="/kaggle/input/artificial-faces-dataset") | |
| p.add_argument("--output", default="/kaggle/working/processed/fingerprint") | |
| p.add_argument("--max", type=int, default=20000) | |
| p.add_argument("--seed", type=int, default=42) | |
| return p.parse_args() | |
| if __name__ == "__main__": | |
| main(parse_args()) | |