Spaces:

ethonmax
/

picpocket

Running

App Files Files Community

chenchaoyun commited on Nov 12, 2025

Commit

586a20d

1 Parent(s): 70d9a8b

fix

Browse files

Files changed (1) hide show

test/remove_faceless_images.py +169 -0

test/remove_faceless_images.py ADDED Viewed

	@@ -0,0 +1,169 @@

+#!/usr/bin/env python3
+"""
+遍历 /opt/data/chinese_celeb_dataset 下的图片，使用 YOLO 人脸检测并删除没有检测到人脸的图片。
+用法示例：
+    python test/remove_faceless_images.py --dry-run
+"""
+from __future__ import annotations
+import argparse
+import sys
+from pathlib import Path
+from typing import Iterable, List, Optional
+import config
+try:
+    from ultralytics import YOLO
+except ImportError as exc:  # pragma: no cover - 运行期缺依赖提示
+    raise SystemExit("缺少 ultralytics，请先执行 pip install ultralytics") from exc
+# 默认数据集与模型配置
+DEFAULT_DATASET_DIR = Path("/opt/data/chinese_celeb_dataset")
+MODEL_DIR = Path(config.MODELS_PATH)
+YOLO_MODEL_NAME = config.YOLO_MODEL
+def parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(
+        description="使用 YOLO 检测 /opt/data/chinese_celeb_dataset 中的图片并删除无脸图片"
+    )
+    parser.add_argument(
+        "--dataset-dir",
+        type=Path,
+        default=DEFAULT_DATASET_DIR,
+        help="需要检查的根目录（默认：/opt/data/chinese_celeb_dataset）",
+    )
+    parser.add_argument(
+        "--extensions",
+        type=str,
+        default=".jpg,.jpeg,.png,.webp,.bmp",
+        help="需要检查的图片扩展名，逗号分隔",
+    )
+    parser.add_argument(
+        "--confidence",
+        type=float,
+        default=config.FACE_CONFIDENCE,
+        help="YOLO 检测的人脸置信度阈值",
+    )
+    parser.add_argument(
+        "--dry-run",
+        action="store_true",
+        help="仅输出将被删除的文件，不真正删除，便于先预览结果",
+    )
+    parser.add_argument(
+        "--verbose",
+        action="store_true",
+        help="输出更多调试信息",
+    )
+    return parser.parse_args()
+def load_yolo_model() -> YOLO:
+    """
+    优先加载本地 models 目录下配置好的模型，如果不存在则回退为模型名称（会触发自动下载）。
+    """
+    candidates: List[str] = []
+    local_path = MODEL_DIR / YOLO_MODEL_NAME
+    if local_path.exists():
+        candidates.append(str(local_path))
+    candidates.append(YOLO_MODEL_NAME)
+    last_error: Optional[Exception] = None
+    for candidate in candidates:
+        try:
+            config.logger.info("尝试加载 YOLO 模型：%s", candidate)
+            return YOLO(candidate)
+        except Exception as exc:  # pragma: no cover
+            last_error = exc
+            config.logger.warning("加载 YOLO 模型失败：%s -> %s", candidate, exc)
+    raise RuntimeError(f"无法加载 YOLO 模型：{YOLO_MODEL_NAME}") from last_error
+def iter_image_files(root: Path, extensions: Iterable[str]) -> Iterable[Path]:
+    lower_exts = tuple(ext.strip().lower() for ext in extensions if ext.strip())
+    for path in root.rglob("*"):
+        if not path.is_file():
+            continue
+        if path.suffix.lower() in lower_exts:
+            yield path
+def has_face(model: YOLO, image_path: Path, confidence: float, verbose: bool = False) -> bool:
+    """
+    使用 YOLO 检测图片中是否存在人脸。检测到任意一个框即可视为有人脸。
+    """
+    try:
+        results = model(image_path, conf=confidence, verbose=False)
+    except Exception as exc:  # pragma: no cover
+        config.logger.error("检测失败，跳过 %s：%s", image_path, exc)
+        return False
+    for result in results:
+        boxes = getattr(result, "boxes", None)
+        if boxes is None:
+            continue
+        if len(boxes) > 0:
+            if verbose:
+                faces = []
+                for box in boxes:
+                    cls_id = int(box.cls[0]) if getattr(box, "cls", None) is not None else -1
+                    score = float(box.conf[0]) if getattr(box, "conf", None) is not None else 0.0
+                    faces.append({"cls": cls_id, "conf": score})
+                config.logger.info("检测到人脸：%s -> %s", image_path, faces)
+            return True
+    return False
+def main() -> None:
+    args = parse_args()
+    dataset_dir: Path = args.dataset_dir.expanduser().resolve()
+    if not dataset_dir.exists():
+        raise SystemExit(f"目录不存在：{dataset_dir}")
+    model = load_yolo_model()
+    image_paths = list(iter_image_files(dataset_dir, args.extensions.split(",")))
+    total = len(image_paths)
+    if total == 0:
+        print(f"目录 {dataset_dir} 下没有匹配到图片文件")
+        return
+    removed = 0
+    errored = 0
+    for idx, image_path in enumerate(image_paths, start=1):
+        if idx % 100 == 0 or args.verbose:
+            print(f"[{idx}/{total}] 正在处理 {image_path}")
+        try:
+            if has_face(model, image_path, args.confidence, args.verbose):
+                continue
+        except Exception as exc:  # pragma: no cover
+            errored += 1
+            config.logger.error("检测过程中发生异常，跳过 %s：%s", image_path, exc)
+            continue
+        if args.dry_run:
+            print(f"[DRY-RUN] 将删除：{image_path}")
+        else:
+            try:
+                image_path.unlink()
+                print(f"已删除：{image_path}")
+            except Exception as exc:  # pragma: no cover
+                errored += 1
+                config.logger.error("删除失败 %s：%s", image_path, exc)
+                continue
+        removed += 1
+    print(
+        f"扫描完成，检测图片 {total} 张，删除 {removed} 张无脸图片，异常 {errored} 张，数据保存在：{dataset_dir}"
+    )
+if __name__ == "__main__":
+    try:
+        main()
+    except KeyboardInterrupt:  # pragma: no cover
+        sys.exit("用户中断")