| """ |
| Minimal example: binarize a document image using the SBB ONNX model. |
| |
| pip install onnxruntime-gpu numpy Pillow |
| python3 sample_workflow.py input.jpg output.tif |
| """ |
|
|
| import sys |
| import numpy as np |
| from PIL import Image |
| import onnxruntime as ort |
|
|
| MODEL = "model_convtranspose.onnx" |
| PATCH = 448 |
|
|
| |
| sess = ort.InferenceSession(MODEL, providers=["CUDAExecutionProvider", "CPUExecutionProvider"]) |
|
|
| |
| img = np.array(Image.open(sys.argv[1]).convert("RGB")) |
| h, w = img.shape[:2] |
|
|
| |
| patches, positions = [], [] |
| for y in range(0, h, PATCH): |
| for x in range(0, w, PATCH): |
| patch = np.zeros((PATCH, PATCH, 3), dtype=np.uint8) |
| ph, pw = min(PATCH, h - y), min(PATCH, w - x) |
| patch[:ph, :pw] = img[y:y+ph, x:x+pw] |
| patches.append(patch) |
| positions.append((x, y)) |
|
|
| |
| lut = np.array([np.float32(np.float64(i) / 255.0) for i in range(256)], dtype=np.float32) |
| patches_float = lut[np.array(patches).astype(np.int32)] |
|
|
| |
| outputs = [] |
| for i in range(0, len(patches), 64): |
| batch = patches_float[i:i+64] |
| out = sess.run(["activation_55"], {"input_1": batch})[0] |
| outputs.append(out) |
| output = np.concatenate(outputs) |
|
|
| |
| result = np.zeros((h, w), dtype=np.float32) |
| weight = np.zeros((h, w), dtype=np.float32) |
| for i, (x, y) in enumerate(positions): |
| prob = output[i, :, :, 1] |
| binary = np.where((prob * 255).astype(np.uint8) <= 128, 255.0, 0.0) |
| ah, aw = min(PATCH, h - y), min(PATCH, w - x) |
| result[y:y+ah, x:x+aw] += binary[:ah, :aw] |
| weight[y:y+ah, x:x+aw] += 1.0 |
| result = (result / np.maximum(weight, 1)).astype(np.uint8) |
|
|
| |
| Image.fromarray(result, "L").convert("1").save( |
| sys.argv[2], format="TIFF", compression="group4", dpi=(300, 300) |
| ) |
| print(f"Saved {sys.argv[2]}") |
|
|