jounery-d commited on Oct 16, 2025

Commit

cfa21ff

verified ·

1 Parent(s): 0e336ee

first commit

Browse files

Files changed (19) hide show

.gitattributes +8 -0
README.md +120 -0
build_config.json +45 -0
model/realesrgan-x2.axmodel +3 -0
model/realesrgan-x4.axmodel +3 -0
pics/00003.png +3 -0
pics/00017_gray.png +0 -0
pics/0014.jpg +0 -0
pics/0030.jpg +0 -0
pics/ADE_val_00000114.jpg +0 -0
pics/OST_009.png +3 -0
pics/children-alpha.png +3 -0
pics/tree_alpha_16bit.png +3 -0
pics/wolf_gray.jpg +0 -0
requirements.txt +3 -0
results/1.png +3 -0
results/2.png +3 -0
run_axmodel.py +184 -0
run_onnx.py +184 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,11 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+model/realesrgan-x2.axmodel filter=lfs diff=lfs merge=lfs -text
+model/realesrgan-x4.axmodel filter=lfs diff=lfs merge=lfs -text
+pics/00003.png filter=lfs diff=lfs merge=lfs -text
+pics/children-alpha.png filter=lfs diff=lfs merge=lfs -text
+pics/OST_009.png filter=lfs diff=lfs merge=lfs -text
+pics/tree_alpha_16bit.png filter=lfs diff=lfs merge=lfs -text
+results/1.png filter=lfs diff=lfs merge=lfs -text
+results/2.png filter=lfs diff=lfs merge=lfs -text

README.md ADDED Viewed

	@@ -0,0 +1,120 @@

+---
+license: mit
+language:
+- en
+base_model:
+- Real-ESRGAN
+pipeline_tag: frame
+tags:
+- Image
+- SuperResolution
+---
+# Real-ESRGAN
+This version of Real-ESRGAN has been converted to run on the Axera NPU using **w8a8** quantization.
+This model has been optimized with the following LoRA:
+Compatible with Pulsar2 version: 4.2
+## Convert tools links:
+For those who are interested in model conversion, you can try to export axmodel through
+- [The repo of AXera Platform](https://github.com/AXERA-TECH/ax-samples), which you can get the detail of guide
+- [Pulsar2 Link, How to Convert ONNX to axmodel](https://pulsar2-docs.readthedocs.io/en/latest/pulsar2/introduction.html)
+## Support Platform
+- AX650
+  - [M4N-Dock(爱芯派Pro)](https://wiki.sipeed.com/hardware/zh/maixIV/m4ndock/m4ndock.html)
+  - [M.2 Accelerator card](https://axcl-docs.readthedocs.io/zh-cn/latest/doc_guide_hardware.html)
+- AX630C
+  - [爱芯派2](https://axera-pi-2-docs-cn.readthedocs.io/zh-cn/latest/index.html)
+  - [Module-LLM](https://docs.m5stack.com/zh_CN/module/Module-LLM)
+  - [LLM630 Compute Kit](https://docs.m5stack.com/zh_CN/core/LLM630%20Compute%20Kit)
+|Chips|model|cost|
+|--|--|--|
+|AX650|realesrgan-x2|15.6 ms|
+|AX650|realesrgan-x4|62.1 ms|
+## How to use
+Download all files from this repository to the device
+```
+root@ax650:~/realesrgan# tree
+.
+|-- model
+|   `-- realesrgan-x2.axmodel
+|   `-- realesrgan-x4.axmodel
+|`-- run_onnx.py
+|`-- run_axmodel.py
+|`-- build_config.json
+|`-- requirements.txt
+```
+### Inference
+Input Data:
+|-- video
+|   `-- demo.mp4
+#### Inference with AX650 Host, such as M4N-Dock(爱芯派Pro)
+```
+root@ax650 ~/realesrgan #python3 run_axmodel.py --input ./pics --outscale 2 --model_path ./realesrgan-x2.axmodel
+[INFO] Available providers:  ['AxEngineExecutionProvider']
+Testing 0 00003
+[INFO] Using provider: AxEngineExecutionProvider
+[INFO] Chip type: ChipType.MC50
+[INFO] VNPU type: VNPUType.DISABLED
+[INFO] Engine version: 2.12.0s
+[INFO] Model type: 2 (triple core)
+[INFO] Compiler version: 4.2-dirty 5e72cf06-dirty
+Testing 1 00017_gray
+[INFO] Using provider: AxEngineExecutionProvider
+[INFO] Model type: 2 (triple core)
+[INFO] Compiler version: 4.2-dirty 5e72cf06-dirty
+Testing 2 0014
+[INFO] Using provider: AxEngineExecutionProvider
+[INFO] Model type: 2 (triple core)
+[INFO] Compiler version: 4.2-dirty 5e72cf06-dirty
+Testing 3 0030
+[INFO] Using provider: AxEngineExecutionProvider
+[INFO] Model type: 2 (triple core)
+[INFO] Compiler version: 4.2-dirty 5e72cf06-dirty
+Testing 4 ADE_val_00000114
+[INFO] Using provider: AxEngineExecutionProvider
+[INFO] Model type: 2 (triple core)
+[INFO] Compiler version: 4.2-dirty 5e72cf06-dirty
+Testing 5 OST_009
+[INFO] Using provider: AxEngineExecutionProvider
+[INFO] Model type: 2 (triple core)
+[INFO] Compiler version: 4.2-dirty 5e72cf06-dirty
+Testing 6 children-alpha
+[INFO] Using provider: AxEngineExecutionProvider
+[INFO] Model type: 2 (triple core)
+[INFO] Compiler version: 4.2-dirty 5e72cf06-dirty
+Testing 7 tree_alpha_16bit
+        Input is a 16-bit image
+[INFO] Using provider: AxEngineExecutionProvider
+[INFO] Model type: 2 (triple core)
+[INFO] Compiler version: 4.2-dirty 5e72cf06-dirty
+Testing 8 wolf_gray
+[INFO] Using provider: AxEngineExecutionProvider
+[INFO] Model type: 2 (triple core)
+[INFO] Compiler version: 4.2-dirty 5e72cf06-dirty
+```
+Output:
+[INFO]:

build_config.json ADDED Viewed

	@@ -0,0 +1,45 @@

+{
+  "work_dir": "",
+  "model_type": "ONNX",
+  "target_hardware": "AX650",
+  "npu_mode": "NPU3",
+  "onnx_opt": {
+    "disable_onnx_optimization": false,
+    "model_check": false,
+   },
+  "quant": {
+    "input_configs": [
+      {
+        "tensor_name": "DEFAULT",
+        "calibration_dataset": "npy.zip",
+        "calibration_format": "Numpy",
+        "calibration_size": 10,
+        "calibration_mean": [0, 0, 0],
+        "calibration_std": [1.0, 1.0, 1.0]
+      }
+    ],
+    "calibration_method": "MinMax",
+    "precision_analysis": true,
+    "precision_analysis_method": "EndToEnd",
+    "precision_analysis_mode": "Reference"
+  },
+  "input_processors": [
+    {
+      "tensor_name": "DEFAULT",
+      "tensor_format": "AutoColorSpace",
+      "tensor_layout": "NCHW",
+      "src_layout": "NCHW",
+      "src_format": "AutoColorSpace",
+      "src_dtype": "FP32",
+    }
+  ],
+  "output_processors": [
+    {
+      "tensor_name": "DEFAULT",
+    }
+  ],
+  "compiler": {
+    "check": 0
+  }
+}

model/realesrgan-x2.axmodel ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:348f9e4d81072b4865cb1d96143134f0de44d3f2c750805b188a5c42ba5d633e
+size 19270519

model/realesrgan-x4.axmodel ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:301a65723e740bbc7082b84d1622ff2555ba732baf6f19373c5b8c9e1e03fb75
+size 19657802

pics/00003.png ADDED Viewed

Git LFS Details

SHA256: d37932ae7d3137a0e38f8a90f7e3e16e13353399db6e29dca5a03a350f5fed1b
Pointer size: 131 Bytes
Size of remote file: 164 kB

pics/00017_gray.png ADDED Viewed

pics/0014.jpg ADDED Viewed

pics/0030.jpg ADDED Viewed

pics/ADE_val_00000114.jpg ADDED Viewed

pics/OST_009.png ADDED Viewed

Git LFS Details

SHA256: 62c8ec34919070f9c6fd3398d7a863b4d214adb4822331e9d507317b683ef46d
Pointer size: 131 Bytes
Size of remote file: 718 kB

pics/children-alpha.png ADDED Viewed

Git LFS Details

SHA256: 17323c91483660079e2e95fce438485b8f144bbaee50b2e7b10a9c343c628589
Pointer size: 131 Bytes
Size of remote file: 275 kB

pics/tree_alpha_16bit.png ADDED Viewed

Git LFS Details

SHA256: e6af49641c52884f1d5af6f8afcc75fa2ee0c31fb8e60a37e907b62aeb30d660
Pointer size: 131 Bytes
Size of remote file: 382 kB

pics/wolf_gray.jpg ADDED Viewed

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+numpy
+opencv-python
+onnxruntime

results/1.png ADDED Viewed

Git LFS Details

SHA256: dcd449360c2bde7274b1a2c1005294c15b0c9b06295d8ad7d7ad0f45e78a0af1
Pointer size: 131 Bytes
Size of remote file: 296 kB

results/2.png ADDED Viewed

Git LFS Details

SHA256: 757a300fd65736d31d86c2b94d1282bca4f2171a69b074e89ea97f373ae55526
Pointer size: 131 Bytes
Size of remote file: 254 kB

run_axmodel.py ADDED Viewed

	@@ -0,0 +1,184 @@

+import argparse
+import cv2
+import glob
+import os
+import math
+import numpy as np
+import axengine as axe
+def pre_process(img, tile_size=128):
+    """Pre-process, such as pre-pad and mod pad, so that the images can be divisible
+    """
+    # mod pad for divisible borders
+    pad_h, pad_w = 0, 0
+    h, w = img.shape[0:2]
+    if h % tile_size != 0:
+        pad_h = (tile_size - h % tile_size)
+    if w % tile_size != 0:
+        pad_w = (tile_size - w % tile_size)
+    img = np.pad(img, ((0, pad_h), (0, pad_w), (0, 0)), mode='reflect')
+    img = np.expand_dims(np.transpose(img, (2, 0, 1)), axis=0)
+    return img
+def tile_process(img, origin_shape, model, scale=2, tile_size=64):
+    """It will first crop input images to tiles, and then process each tile.
+    Finally, all the processed tiles are merged into one images.
+    """
+    # determine model paths
+    if not os.path.exists(model):
+        raise ValueError(f'Model {model} does not exist.')
+    session = axe.InferenceSession(model)
+    input_name = session.get_inputs()[0].name
+    output_names = [x.name for x in session.get_outputs()]
+    # tile
+    batch, channel, height, width = img.shape
+    output_height = int(round(height * scale))
+    output_width = int(round(width * scale))
+    output_shape = (batch, channel, output_height, output_width)
+    # start with black image
+    output = np.zeros(output_shape)
+    tiles_x = math.ceil(width / tile_size)
+    tiles_y = math.ceil(height / tile_size)
+    # loop over all tiles
+    for y in range(tiles_y):
+        for x in range(tiles_x):
+            # extract tile from input image
+            ofs_x = x * tile_size
+            ofs_y = y * tile_size
+            # input tile area on total image
+            input_start_x = ofs_x
+            input_end_x = min(ofs_x + tile_size, width)
+            input_start_y = ofs_y
+            input_end_y = min(ofs_y + tile_size, height)
+            # input tile dimensions
+            tile_idx = y * tiles_x + x + 1
+            input_tile = img[:, :, input_start_y:input_end_y, input_start_x:input_end_x]
+            # upscale tile
+            try:
+                output_tile = session.run(output_names, {input_name: input_tile})
+            except RuntimeError as error:
+                print('Error', error)
+            #print(f'\tTile {tile_idx}/{tiles_x * tiles_y}')
+            # output tile area on total image
+            output_start_x = int(round(input_start_x * scale))
+            output_end_x = int(round(input_end_x * scale))
+            output_start_y = int(round(input_start_y * scale))
+            output_end_y = int(round(input_end_y * scale))
+            output[:, :, output_start_y:output_end_y, output_start_x:output_end_x] = output_tile[0]
+    # remove extra padding parts
+    origin_h, origin_w = origin_shape[0:2]
+    output = output[:, :, :int(round(origin_h * scale)), :int(round(origin_w * scale))].squeeze(0)
+    output = np.transpose(output[[2, 1, 0], :, :], (1, 2, 0)).astype(np.float32)
+    return output
+def main():
+    """Inference demo for Real-ESRGAN.
+    """
+    parser = argparse.ArgumentParser()
+    parser.add_argument('-i', '--input', type=str, default='inputs', help='Input image or folder')
+    parser.add_argument('-o', '--output', type=str, default='results', help='Output folder')
+    parser.add_argument('-s', '--outscale', type=float, default=2, help='The final upsampling scale of the image, [Option:2, 4]')
+    parser.add_argument(
+        '--model_path', type=str, default=None, help='Model path. you need to specify it [Options: ]')
+    parser.add_argument('--suffix', type=str, default='out', help='Suffix of the restored image')
+    parser.add_argument('-t', '--tile', type=int, default=128, help='Tile size, 0 for no tile during testing')
+    parser.add_argument(
+        '--ext',
+        type=str,
+        default='auto',
+        help='Image extension. Options: auto | jpg | png, auto means using the same extension as inputs')
+    args = parser.parse_args()
+    # input
+    if os.path.isfile(args.input):
+        paths = [args.input]
+    else:
+        paths = sorted(glob.glob(os.path.join(args.input, '*')))
+    # output
+    os.makedirs(args.output, exist_ok=True)
+    for idx, path in enumerate(paths):
+        imgname, extension = os.path.splitext(os.path.basename(path))
+        print('Testing', idx, imgname)
+        if extension not in ['.jpg', '.jpeg', '.png', '.tif', '.tiff', '.bmp', '.webp']:
+            continue
+        img = cv2.imread(path, cv2.IMREAD_UNCHANGED)
+        if img is None:
+            print('Error loading image')
+            continue
+        img = img.astype(np.float32)
+        if np.max(img) > 256:  # 16-bit image
+            max_range = 65535
+            print('\tInput is a 16-bit image')
+        else:
+            max_range = 255
+        img = img / max_range
+        if len(img.shape) == 2:  # gray image
+            img_mode = 'L'
+            img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
+        elif img.shape[2] == 4:  # RGBA image with alpha channel
+            img_mode = 'RGBA'
+            alpha = img[:, :, 3]
+            img = img[:, :, 0:3]
+            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+        else:
+            img_mode = 'RGB'
+            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+        # pre-process
+        origin_shape = img.shape
+        img = pre_process(img, args.tile)
+        # tile process
+        try:
+            output_img = tile_process(img, origin_shape, args.model_path, args.outscale, args.tile)
+        except RuntimeError as error:
+            print('Error', error)
+            print('If you encounter out of memory, try to set --tile with a smaller number.')
+        if img_mode == 'L':
+            output_img = cv2.cvtColor(output_img, cv2.COLOR_BGR2GRAY)
+        if img_mode == 'RGBA':
+            h, w = alpha.shape[0:2]
+            output_alpha = cv2.resize(
+                alpha,
+                (int(round(w * args.outscale)),
+                int(round(h * args.outscale))),
+                interpolation=cv2.INTER_LINEAR
+            )
+            output_img = cv2.cvtColor(output_img, cv2.COLOR_BGR2BGRA)
+            output_img[:, :, 3] = output_alpha
+        if max_range == 65535:  # 16-bit image
+            output = np.clip((output_img * 65535.0), 0, 65535).astype(np.uint16)
+        else:
+            output = np.clip((output_img * 255.0), 0, 255).round().astype(np.uint8)
+        if args.ext == 'auto':
+            extension = extension[1:]
+        else:
+            extension = args.ext
+        if args.suffix == '':
+            save_path = os.path.join(args.output, f'{imgname}.{extension}')
+        else:
+            save_path = os.path.join(args.output, f'{imgname}_{args.suffix}.{extension}')
+        cv2.imwrite(save_path, output)
+if __name__ == '__main__':
+    main()

run_onnx.py ADDED Viewed

	@@ -0,0 +1,184 @@

+import argparse
+import cv2
+import glob
+import os
+import math
+import numpy as np
+import onnxruntime as ort
+def pre_process(img, tile_size=128):
+    """Pre-process, such as pre-pad and mod pad, so that the images can be divisible
+    """
+    # mod pad for divisible borders
+    pad_h, pad_w = 0, 0
+    h, w = img.shape[0:2]
+    if h % tile_size != 0:
+        pad_h = (tile_size - h % tile_size)
+    if w % tile_size != 0:
+        pad_w = (tile_size - w % tile_size)
+    img = np.pad(img, ((0, pad_h), (0, pad_w), (0, 0)), mode='reflect')
+    img = np.expand_dims(np.transpose(img, (2, 0, 1)), axis=0)
+    return img
+def tile_process(img, origin_shape, model, scale=2, tile_size=64):
+    """It will first crop input images to tiles, and then process each tile.
+    Finally, all the processed tiles are merged into one images.
+    """
+    # determine model paths
+    if not os.path.exists(model):
+        raise ValueError(f'Model {model} does not exist.')
+    session = ort.InferenceSession(model)
+    input_name = session.get_inputs()[0].name
+    output_names = [x.name for x in session.get_outputs()]
+    # tile
+    batch, channel, height, width = img.shape
+    output_height = int(round(height * scale))
+    output_width = int(round(width * scale))
+    output_shape = (batch, channel, output_height, output_width)
+    # start with black image
+    output = np.zeros(output_shape)
+    tiles_x = math.ceil(width / tile_size)
+    tiles_y = math.ceil(height / tile_size)
+    # loop over all tiles
+    for y in range(tiles_y):
+        for x in range(tiles_x):
+            # extract tile from input image
+            ofs_x = x * tile_size
+            ofs_y = y * tile_size
+            # input tile area on total image
+            input_start_x = ofs_x
+            input_end_x = min(ofs_x + tile_size, width)
+            input_start_y = ofs_y
+            input_end_y = min(ofs_y + tile_size, height)
+            # input tile dimensions
+            tile_idx = y * tiles_x + x + 1
+            input_tile = img[:, :, input_start_y:input_end_y, input_start_x:input_end_x]
+            # upscale tile
+            try:
+                output_tile = session.run(output_names, {input_name: input_tile})
+            except RuntimeError as error:
+                print('Error', error)
+            #print(f'\tTile {tile_idx}/{tiles_x * tiles_y}')
+            # output tile area on total image
+            output_start_x = int(round(input_start_x * scale))
+            output_end_x = int(round(input_end_x * scale))
+            output_start_y = int(round(input_start_y * scale))
+            output_end_y = int(round(input_end_y * scale))
+            output[:, :, output_start_y:output_end_y, output_start_x:output_end_x] = output_tile[0]
+    # remove extra padding parts
+    origin_h, origin_w = origin_shape[0:2]
+    output = output[:, :, :int(round(origin_h * scale)), :int(round(origin_w * scale))].squeeze(0)
+    output = np.transpose(output[[2, 1, 0], :, :], (1, 2, 0)).astype(np.float32)
+    return output
+def main():
+    """Inference demo for Real-ESRGAN.
+    """
+    parser = argparse.ArgumentParser()
+    parser.add_argument('-i', '--input', type=str, default='inputs', help='Input image or folder')
+    parser.add_argument('-o', '--output', type=str, default='results', help='Output folder')
+    parser.add_argument('-s', '--outscale', type=float, default=2, help='The final upsampling scale of the image, [Option:2, 4]')
+    parser.add_argument(
+        '--model_path', type=str, default=None, help='Model path. you need to specify it [Options: ]')
+    parser.add_argument('--suffix', type=str, default='out', help='Suffix of the restored image')
+    parser.add_argument('-t', '--tile', type=int, default=128, help='Tile size, 0 for no tile during testing')
+    parser.add_argument(
+        '--ext',
+        type=str,
+        default='auto',
+        help='Image extension. Options: auto | jpg | png, auto means using the same extension as inputs')
+    args = parser.parse_args()
+    # input
+    if os.path.isfile(args.input):
+        paths = [args.input]
+    else:
+        paths = sorted(glob.glob(os.path.join(args.input, '*')))
+    # output
+    os.makedirs(args.output, exist_ok=True)
+    for idx, path in enumerate(paths):
+        imgname, extension = os.path.splitext(os.path.basename(path))
+        print('Testing', idx, imgname)
+        if extension not in ['.jpg', '.jpeg', '.png', '.tif', '.tiff', '.bmp', '.webp']:
+            continue
+        img = cv2.imread(path, cv2.IMREAD_UNCHANGED)
+        if img is None:
+            print('Error loading image')
+            continue
+        img = img.astype(np.float32)
+        if np.max(img) > 256:  # 16-bit image
+            max_range = 65535
+            print('\tInput is a 16-bit image')
+        else:
+            max_range = 255
+        img = img / max_range
+        if len(img.shape) == 2:  # gray image
+            img_mode = 'L'
+            img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
+        elif img.shape[2] == 4:  # RGBA image with alpha channel
+            img_mode = 'RGBA'
+            alpha = img[:, :, 3]
+            img = img[:, :, 0:3]
+            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+        else:
+            img_mode = 'RGB'
+            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+        # pre-process
+        origin_shape = img.shape
+        img = pre_process(img, args.tile)
+        # tile process
+        try:
+            output_img = tile_process(img, origin_shape, args.model_path, args.outscale, args.tile, imgname)
+        except RuntimeError as error:
+            print('Error', error)
+            print('If you encounter out of memory, try to set --tile with a smaller number.')
+        if img_mode == 'L':
+            output_img = cv2.cvtColor(output_img, cv2.COLOR_BGR2GRAY)
+        if img_mode == 'RGBA':
+            h, w = alpha.shape[0:2]
+            output_alpha = cv2.resize(
+                alpha,
+                (int(round(w * args.outscale)),
+                int(round(h * args.outscale))),
+                interpolation=cv2.INTER_LINEAR
+            )
+            output_img = cv2.cvtColor(output_img, cv2.COLOR_BGR2BGRA)
+            output_img[:, :, 3] = output_alpha
+        if max_range == 65535:  # 16-bit image
+            output = np.clip((output_img * 65535.0), 0, 65535).astype(np.uint16)
+        else:
+            output = np.clip((output_img * 255.0), 0, 255).round().astype(np.uint8)
+        if args.ext == 'auto':
+            extension = extension[1:]
+        else:
+            extension = args.ext
+        if args.suffix == '':
+            save_path = os.path.join(args.output, f'{imgname}.{extension}')
+        else:
+            save_path = os.path.join(args.output, f'{imgname}_{args.suffix}.{extension}')
+        cv2.imwrite(save_path, output)
+if __name__ == '__main__':
+    main()