""" Minimal example: binarize a document image using the SBB ONNX model. pip install onnxruntime-gpu numpy Pillow python3 sample_workflow.py input.jpg output.tif """ import sys import numpy as np from PIL import Image import onnxruntime as ort MODEL = "model_convtranspose.onnx" PATCH = 448 # Load model sess = ort.InferenceSession(MODEL, providers=["CUDAExecutionProvider", "CPUExecutionProvider"]) # Load image img = np.array(Image.open(sys.argv[1]).convert("RGB")) h, w = img.shape[:2] # Extract 448x448 patches (the model requires fixed-size input) patches, positions = [], [] for y in range(0, h, PATCH): for x in range(0, w, PATCH): patch = np.zeros((PATCH, PATCH, 3), dtype=np.uint8) ph, pw = min(PATCH, h - y), min(PATCH, w - x) patch[:ph, :pw] = img[y:y+ph, x:x+pw] patches.append(patch) positions.append((x, y)) # Normalize (matches original TF model's float64->float32 rounding) lut = np.array([np.float32(np.float64(i) / 255.0) for i in range(256)], dtype=np.float32) patches_float = lut[np.array(patches).astype(np.int32)] # Run inference in batches outputs = [] for i in range(0, len(patches), 64): batch = patches_float[i:i+64] out = sess.run(["activation_55"], {"input_1": batch})[0] outputs.append(out) output = np.concatenate(outputs) # Threshold and reconstruct result = np.zeros((h, w), dtype=np.float32) weight = np.zeros((h, w), dtype=np.float32) for i, (x, y) in enumerate(positions): prob = output[i, :, :, 1] binary = np.where((prob * 255).astype(np.uint8) <= 128, 255.0, 0.0) ah, aw = min(PATCH, h - y), min(PATCH, w - x) result[y:y+ah, x:x+aw] += binary[:ah, :aw] weight[y:y+ah, x:x+aw] += 1.0 result = (result / np.maximum(weight, 1)).astype(np.uint8) # Save Image.fromarray(result, "L").convert("1").save( sys.argv[2], format="TIFF", compression="group4", dpi=(300, 300) ) print(f"Saved {sys.argv[2]}")