upload models

Files changed (10) hide show

.gitignore +6 -0
README.md +107 -0
latent.npy +3 -0
onnx_eval.py +206 -0
onnx_inference.py +53 -0
onnx_runner.py +119 -0
psfrgan_nchw_fp32.onnx +3 -0
psfrgan_nhwc_int8.onnx +3 -0
requirements-eval.txt +6 -0
requirements-infer.txt +4 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,6 @@

+.vscode/
+.venv/
+*.pyc
+__pycache__/
+outputs/
+datasets/

README.md CHANGED Viewed

@@ -1,3 +1,110 @@
 ---
 license: apache-2.0
 ---

 ---
 license: apache-2.0
+tags:
+  - RyzenAI
+  - Int8 quantization
+  - Face Restoration
+  - PSFRGAN
+  - ONNX
+  - Computer Vision
+metrics:
+  - PSNR
+  - MS_SSIM
+  - FID
 ---
+# PSFRGAN for face restoration
+The model operates at 512x512 resolution and is particularly effective at restoring faces with various degradations including blur, noise, compression artifacts, and low resolution.
+It was introduced in the paper _Progressive Semantic-Aware Style Transformation for Blind Face Restoration_ by Chaofeng Chen et al. at CVPR 2021.
+We have developed a modified version optimized for [AMD Ryzen AI](https://onnxruntime.ai/docs/execution-providers/Vitis-AI-ExecutionProvider.html).
+## Model description
+PSFRGAN (Progressive Semantic-aware Face Restoration Generative Adversarial Network) is a deep learning model designed for blind face restoration, capable of recovering high-quality face images from severely degraded inputs.
+## Intended uses & limitations
+You can use this model for face restoration tasks. See the [model hub](https://huggingface.co/models?search=amd/ryzenai-psfrgan) for all available psfrgan models.
+## How to use
+### Installation
+```bash
+# inference only
+pip install -r requirements-infer.txt
+# inference & evaluation
+pip install -r requirements-eval.txt
+```
+### Data Preparation (optional: for accuracy evaluation)
+1. Download `CelebA-Test (LQ)` and `CelebA-Test (HQ)` from [GFP-GAN homepage](https://xinntao.github.io/projects/gfpgan)
+2. Organize the dataset directory as follows:
+```Plain
+└── datasets
+     └── celeba_512_validation
+           ├── 00000000.png
+           ├── ...
+     ├── celeba_512_validation_lq
+           ├── 00000000.png
+           ├── ...
+```
+### Test & Evaluation
+- Run inference on images
+```bash
+python onnx_inference.py --onnx psfrgan_nchw_fp32.onnx --latent latent.npy --input /Path/To/Image --out-dir outputs
+python onnx_inference.py --onnx psfrgan_nhwc_int8.onnx --latent latent.npy --input /Path/To/Image --out-dir outputs
+```
+**Arguments:**
+- `--input`: Accepts either a single image file path or a directory path. If it's a file, the script will process that image only. If it's a directory, the script will recursively scan for .png, .jpg, and .jpeg files and process all of them.
+- `--latent`: (Optional) Path to the latent code file (.npy). If not provided, random latent values will be generated with a fixed seed for reproducibility.
+- `--out-dir`: Output directory where the restored images will be saved.
+- Evaluate the quantized model
+```bash
+# eval fp32
+python onnx_eval.py \
+      --onnx psfrgan_nchw_fp32.onnx \
+      --latent latent.npy \
+      --hq-dir datasets/celeba_512_validation \
+      --lq-dir datasets/celeba_512_validation_lq \
+      --out-dir outputs/fp32 -clean
+# eval int8
+python onnx_eval.py \
+      --onnx psfrgan_nhwc_int8.onnx \
+      --latent latent.npy \
+      --hq-dir datasets/celeba_512_validation \
+      --lq-dir datasets/celeba_512_validation_lq \
+      --out-dir outputs/int8 -clean
+```
+### Performance
+| Model          | PSNR(↑) | MS_SSIM(↑) | FID(↓) |
+| -------------- | ------- | ---------- | ------ |
+| PSFRGAN (fp32) | 25.27   | 0.8500     | 21.99  |
+| PSFRGAN (int8) | 25.27   | 0.8487     | 24.34  |
+---
+```bibtex
+@inproceedings{ChenPSFRGAN,
+    author = {Chen, Chaofeng and Li, Xiaoming and Lingbo, Yang and Lin, Xianhui and Zhang, Lei and Wong, Kwan-Yee~K.},
+    title = {Progressive Semantic-Aware Style Transformation for Blind Face Restoration},
+    Journal = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
+    year = {2021}
+}
+```

latent.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6570f1486bc5366e148bc7bdbd6054bc07e54d3a575bffde060f1f36a742f2b9
+size 1048704

onnx_eval.py ADDED Viewed

	@@ -0,0 +1,206 @@

+import sys
+import json
+from pathlib import Path
+sys.path.insert(0, Path(__file__).parent.as_posix())
+import cv2
+import pyiqa
+import torch
+import numpy as np
+from tqdm import tqdm
+from onnx_runner import OnnxRunner
+def collect_common_image_pairs(
+    lq_dir: Path, hq_dir: Path
+) -> tuple[list[Path], list[Path]]:
+    exts = {".png", ".jpg", ".jpeg"}
+    def is_img(p: Path) -> bool:
+        return p.is_file() and p.suffix.lower() in exts
+    hq_map = {p.stem: p for p in hq_dir.iterdir() if is_img(p)}
+    hq_names = sorted(hq_map.keys())
+    lq_files = [p for p in lq_dir.iterdir() if is_img(p)]
+    lq_paths: list[Path] = []
+    hq_paths: list[Path] = []
+    for base in hq_names:
+        # try full match first
+        best_lq = next((p for p in lq_files if p.stem == base), None)
+        # try prefix match then
+        if best_lq is None:
+            best_lq = next(
+                (
+                    p
+                    for p in lq_files
+                    if p.stem.startswith(base) and len(p.stem) > len(base)
+                ),
+                None,
+            )
+        if best_lq is not None:  # matched
+            hq_paths.append(hq_map[base])
+            lq_paths.append(best_lq)
+    return lq_paths, hq_paths
+def align_shape(sr_bgr: np.ndarray, hq_bgr: np.ndarray):
+    if sr_bgr.shape != hq_bgr.shape:
+        sr_bgr = cv2.resize(
+            sr_bgr,
+            (hq_bgr.shape[1], hq_bgr.shape[0]),
+            interpolation=cv2.INTER_LINEAR,
+        )
+    return sr_bgr, hq_bgr
+def gen_sr_images(
+    hq_dir: Path,
+    lq_dir: Path,
+    out_dir: Path,
+    onnx_path: Path,
+    latent_path: Path,
+    max_samples: int,
+):
+    out_dir.mkdir(exist_ok=True, parents=True)
+    onnx_runner = OnnxRunner(onnx_path, latent_path)
+    lq_paths, hq_paths = collect_common_image_pairs(lq_dir, hq_dir)
+    if max_samples is not None:
+        lq_paths = lq_paths[: max(max_samples, 1)]
+        hq_paths = hq_paths[: max(max_samples, 1)]
+    sr_paths = []
+    for i in tqdm(range(len(lq_paths)), desc="generating"):
+        lq_img_path = lq_paths[i]
+        lq_bgr = cv2.imread(lq_img_path.as_posix(), cv2.IMREAD_COLOR)
+        assert lq_bgr is not None
+        sr_bgr = onnx_runner.run(lq_bgr)
+        hq_img_path = hq_paths[i]
+        hq_bgr = cv2.imread(hq_img_path.as_posix(), cv2.IMREAD_COLOR)
+        sr_bgr, hq_bgr = align_shape(sr_bgr, hq_bgr)
+        out_path = out_dir / f"{lq_img_path.stem}.png"
+        cv2.imwrite(out_path.as_posix(), sr_bgr)
+        sr_paths.append(out_path)
+    return hq_paths, sr_paths
+def eval_metrics(
+    hq_paths: list[Path],
+    sr_paths: list[Path],
+    hq_dir: Path,
+    sr_dir: Path,
+    device: torch.device | None = None,
+) -> dict[str, float]:
+    assert len(hq_paths) == len(sr_paths)
+    device = device or (
+        torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
+    )
+    psnr_metric = pyiqa.create_metric("psnr", device=device)  # FR: sr, ref
+    ms_ssim_metric = pyiqa.create_metric("ms_ssim", device=device)  # FR: sr, ref
+    fid_metric = pyiqa.create_metric("fid")
+    with torch.inference_mode():
+        psnr_vals = []
+        ms_ssim_vals = []
+        for sr_p, hq_p in zip(sr_paths, hq_paths):
+            sr_p = sr_p.as_posix()
+            hq_p = hq_p.as_posix()
+            psnr_vals.append(psnr_metric(sr_p, hq_p).detach())
+            ms_ssim_vals.append(ms_ssim_metric(sr_p, hq_p).detach())
+        psnr = torch.stack(psnr_vals).mean().item()
+        ms_ssim = torch.stack(ms_ssim_vals).mean().item()
+        fid = fid_metric(
+            sr_dir.as_posix(),
+            hq_dir.as_posix(),
+            mode="clean",
+            batch_size=1,
+            num_workers=0,
+        ).item()
+    return {"psnr": psnr, "ms_ssim": ms_ssim, "fid": fid}
+def main(args):
+    onnx_path = Path(args.onnx)
+    latent_path = Path(args.latent)
+    hq_dir = Path(args.hq_dir)
+    lq_dir = Path(args.lq_dir)
+    out_dir = Path(args.out_dir)
+    assert onnx_path.suffix == ".onnx" and onnx_path.is_file()
+    assert latent_path.suffix == ".npy" and latent_path.is_file()
+    assert lq_dir.is_dir(), f"{lq_dir} is not a dir!"
+    assert hq_dir.is_dir(), f"{hq_dir} is not a dir!"
+    sr_dir = out_dir / "sr"
+    hq_paths, sr_paths = gen_sr_images(
+        hq_dir, lq_dir, sr_dir, onnx_path, latent_path, args.max_samples
+    )
+    scores = eval_metrics(hq_paths, sr_paths, hq_dir, sr_dir)
+    summary = {
+        "onnx": onnx_path.as_posix(),
+        "psnr": scores["psnr"],
+        "ms_ssim": scores["ms_ssim"],
+        "fid": scores["fid"],
+    }
+    out_file = out_dir / f"eval_{onnx_path.stem}_result.json"
+    with open(out_file, "w") as f:
+        json.dump(summary, f, indent=2)
+    dataset_name = hq_dir.parent.name
+    print(f"summary of {dataset_name}: PSNR | MS_SSIM | FID")
+    print(
+        f"{dataset_name}: {scores['psnr']:.2f} | {scores['ms_ssim']:.4f} | {scores['fid']:.2f}"
+    )
+    print(f"result saved to {out_file}")
+    if args.clean:
+        import shutil
+        print(f"cleaning enhanced lq dir: {sr_dir}")
+        shutil.rmtree(sr_dir.as_posix(), ignore_errors=True)
+if __name__ == "__main__":
+    from argparse import ArgumentParser
+    parser = ArgumentParser()
+    parser.add_argument("--onnx", type=str, required=True)
+    parser.add_argument("--latent", type=str, required=True)
+    parser.add_argument("--hq-dir", type=str, required=True)
+    parser.add_argument("--lq-dir", type=str, required=True)
+    parser.add_argument("--out-dir", type=str, default="outputs")
+    parser.add_argument(
+        "--max-samples",
+        type=int,
+        default=None,
+        help="limit number of used samples(debug purpose only), None means not-limited",
+    )
+    parser.add_argument(
+        "-clean",
+        action="store_true",
+        default=False,
+        help="clean out-dir when finished",
+    )
+    main(parser.parse_args())

onnx_inference.py ADDED Viewed

	@@ -0,0 +1,53 @@

+import sys
+from pathlib import Path
+sys.path.insert(0, Path(__file__).parent.as_posix())
+import cv2
+from onnx_runner import OnnxRunner
+def main(args):
+    onnx_path = Path(args.onnx)
+    input_path = Path(args.input)
+    out_dir = Path(args.out_dir)
+    assert onnx_path.suffix == ".onnx"
+    if input_path.is_file():
+        input_images_path = [input_path]
+    else:
+        input_images_path = sorted(
+            [
+                p
+                for p in input_path.rglob("*")
+                if p.suffix.lower() in (".png", ".jpg", ".jpeg")
+            ]
+        )
+    out_dir.mkdir(exist_ok=True, parents=True)
+    onnx_runner = OnnxRunner(onnx_path, args.latent)
+    for input_img_path in input_images_path:
+        input_img_path: Path
+        input_bgr = cv2.imread(input_img_path.as_posix(), cv2.IMREAD_COLOR)
+        assert input_bgr is not None
+        out_bgr = onnx_runner.run(input_bgr)
+        out_path = out_dir / f"{input_img_path.stem}.png"
+        cv2.imwrite(out_path.as_posix(), out_bgr)
+        print(f"saved {out_path}")
+if __name__ == "__main__":
+    from argparse import ArgumentParser
+    parser = ArgumentParser()
+    parser.add_argument("--onnx", type=str, required=True)
+    parser.add_argument("--input", type=str, required=True)
+    parser.add_argument("--out-dir", type=str, required=True)
+    parser.add_argument("--latent", type=str, default=None)
+    main(parser.parse_args())

onnx_runner.py ADDED Viewed

	@@ -0,0 +1,119 @@

+from pathlib import Path
+import cv2
+import numpy as np
+import onnxruntime as ort
+def parse_input_shape_fmt(input_shape):
+    """parse input shape is nchw or nhwc format.
+    We assume c is smaller than h&w dimensions
+    """
+    assert len(input_shape) == 4
+    c1, c2, c3 = input_shape[1:]
+    if c1 < min(c2, c3):  # c1 is channel dimension
+        return "nchw"
+    elif c3 < min(c1, c2):  # c3 is channel dimension
+        return "nhwc"
+    else:
+        raise ValueError(f"can not parse input format for shape: {input_shape}")
+def preprocess(img_bgr: np.ndarray, input_shape_hw: tuple[int, int]):
+    in_h, in_w = input_shape_hw
+    resized_bgr = cv2.resize(img_bgr, (in_w, in_h), interpolation=cv2.INTER_LINEAR)
+    resized_rgb = cv2.cvtColor(resized_bgr, cv2.COLOR_BGR2RGB)
+    normed_rgb = (resized_rgb / 255.0 - 0.5) / 0.5  # norm 0~255 -> -1~1
+    return normed_rgb
+def postprocess(pred_3d: np.ndarray, pred_fmt: str, origin_hw: tuple[int, int]):
+    de_normed_3d = (pred_3d * 0.5 + 0.5) * 255  # de-norm -1~1 -> 0~255
+    if pred_fmt == "nchw":
+        hwc = np.transpose(de_normed_3d, [1, 2, 0])  # chw -> hwc
+    else:  # nhwc
+        hwc = de_normed_3d  # unchanged
+    pred_rgb = np.clip(hwc, 0, 255).astype(np.uint8)
+    pred_bgr = cv2.cvtColor(pred_rgb, cv2.COLOR_RGB2BGR)
+    if tuple(pred_bgr.shape[:2]) != tuple(origin_hw):
+        pred_bgr = cv2.resize(pred_bgr, origin_hw[::-1], interpolation=cv2.INTER_LINEAR)
+    return pred_bgr
+class OnnxRunner:
+    def __init__(self, onnx_path, latent_path=None, debug=False):
+        if "CUDAExecutionProvider" in ort.get_available_providers():
+            providers = ["CUDAExecutionProvider"]
+        else:
+            providers = ["CPUExecutionProvider"]
+        ort_session = ort.InferenceSession(str(onnx_path), providers=providers)
+        input0 = ort_session.get_inputs()[0]
+        self.input_name = input0.name
+        self.input_shape = tuple(input0.shape)
+        self.input_format = parse_input_shape_fmt(input0.shape)
+        self.ort_session = ort_session
+        self.debug = debug
+        if self.input_format == "nchw":
+            self._in_h, self._in_w = self.input_shape[2:]
+        else:  # nhwc
+            self._in_h, self._in_w = self.input_shape[1:3]
+        if len(ort_session.get_inputs()) == 2:
+            latent_input = ort_session.get_inputs()[1]
+            self.latent_input_name = latent_input.name
+            if latent_path is not None and Path(latent_path).is_file():
+                latent = np.load(str(latent_path))  # nchw format
+                latent = np.transpose(latent, [0, 2, 3, 1])  # nchw -> nhwc
+            else:
+                rng = np.random.default_rng(seed=5122)
+                latent = rng.standard_normal(latent_input.shape)
+            self.latent = np.float32(latent)
+        else:
+            self.latent_input_name = None
+        if debug:
+            self._dbg_out_dir = Path(__file__).parent / "outputs"
+            self._dbg_out_dir.mkdir(exist_ok=True, parents=True)
+    def run(self, original_bgr: np.ndarray) -> np.ndarray:
+        """Enhance given uint8 bgr image, and return enhanced uint8 bgr image."""
+        assert original_bgr.dtype == np.uint8
+        assert original_bgr.ndim == 3
+        assert original_bgr.shape[2] == 3
+        # =====================
+        # preprocessing
+        # =====================
+        input_hwc = preprocess(original_bgr, (self._in_h, self._in_w))
+        # =====================
+        # inference
+        # =====================
+        if self.input_format == "nchw":
+            input_3d = np.transpose(input_hwc, [2, 0, 1])  # hwc -> chw
+        else:  # nhwc
+            input_3d = input_hwc
+        feed = {
+            self.input_name: np.float32(input_3d[None, ...]),
+        }
+        if self.latent_input_name is not None:
+            feed[self.latent_input_name] = self.latent
+        outputs = self.ort_session.run(None, feed)
+        pred_3d: np.ndarray = outputs[0][0]
+        enhanced_bgr = postprocess(pred_3d, self.input_format, original_bgr.shape[:2])
+        return enhanced_bgr

psfrgan_nchw_fp32.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a4869eef68b926f381b921d4322052ce78d98dbc9419d38cd7e21c8b757e3dc0
+size 26298729

psfrgan_nhwc_int8.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:21a7d09b2406ddce9a4540c6088152223b25aef6af13c1b0524b7b6c757d6a78
+size 25331858

requirements-eval.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+onnxruntime==1.22
+numpy==1.26.*
+opencv-python==4.8.*
+tqdm
+torch==2.6.0
+pyiqa @ git+https://github.com/chaofengc/IQA-PyTorch.git@e851fd62e66a97345e1281d80e8deb4ab7b93c83

requirements-infer.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+onnxruntime==1.22
+numpy==1.26.*
+opencv-python==4.8.*
+tqdm