sbb-binarization-onnx / sample_workflow.py
nathansut1's picture
Upload sample_workflow.py with huggingface_hub
1b5db69 verified
"""
Minimal example: binarize a document image using the SBB ONNX model.
pip install onnxruntime-gpu numpy Pillow
python3 sample_workflow.py input.jpg output.tif
"""
import sys
import numpy as np
from PIL import Image
import onnxruntime as ort
MODEL = "model_convtranspose.onnx"
PATCH = 448
# Load model
sess = ort.InferenceSession(MODEL, providers=["CUDAExecutionProvider", "CPUExecutionProvider"])
# Load image
img = np.array(Image.open(sys.argv[1]).convert("RGB"))
h, w = img.shape[:2]
# Extract 448x448 patches (the model requires fixed-size input)
patches, positions = [], []
for y in range(0, h, PATCH):
for x in range(0, w, PATCH):
patch = np.zeros((PATCH, PATCH, 3), dtype=np.uint8)
ph, pw = min(PATCH, h - y), min(PATCH, w - x)
patch[:ph, :pw] = img[y:y+ph, x:x+pw]
patches.append(patch)
positions.append((x, y))
# Normalize (matches original TF model's float64->float32 rounding)
lut = np.array([np.float32(np.float64(i) / 255.0) for i in range(256)], dtype=np.float32)
patches_float = lut[np.array(patches).astype(np.int32)]
# Run inference in batches
outputs = []
for i in range(0, len(patches), 64):
batch = patches_float[i:i+64]
out = sess.run(["activation_55"], {"input_1": batch})[0]
outputs.append(out)
output = np.concatenate(outputs)
# Threshold and reconstruct
result = np.zeros((h, w), dtype=np.float32)
weight = np.zeros((h, w), dtype=np.float32)
for i, (x, y) in enumerate(positions):
prob = output[i, :, :, 1]
binary = np.where((prob * 255).astype(np.uint8) <= 128, 255.0, 0.0)
ah, aw = min(PATCH, h - y), min(PATCH, w - x)
result[y:y+ah, x:x+aw] += binary[:ah, :aw]
weight[y:y+ah, x:x+aw] += 1.0
result = (result / np.maximum(weight, 1)).astype(np.uint8)
# Save
Image.fromarray(result, "L").convert("1").save(
sys.argv[2], format="TIFF", compression="group4", dpi=(300, 300)
)
print(f"Saved {sys.argv[2]}")