uvdoc-grid-onnx / example.py
fredcallagan's picture
Upload folder using huggingface_hub
39c8284 verified
#!/usr/bin/env python3
"""
UVDoc Grid-Output Document Unwarping Example
This script demonstrates how to use the UVDoc ONNX model with grid output
for high-resolution document unwarping.
The key advantage of this grid-output model over image-output models is that
the coordinate grid can be upscaled to any resolution, preserving document
quality when applied via cv2.remap().
Usage:
python example.py input_image.jpg output_image.jpg
python example.py input_image.jpg output_image.jpg --model path/to/UVDoc_grid.onnx
Requirements:
pip install onnxruntime opencv-python numpy
Optional (for automatic model download):
pip install huggingface_hub
"""
import argparse
import sys
from pathlib import Path
import cv2
import numpy as np
# Model input dimensions (fixed for UVDoc architecture)
MODEL_INPUT_HEIGHT = 720
MODEL_INPUT_WIDTH = 496
def load_model(model_path: str = None):
"""
Load the ONNX model.
Args:
model_path: Path to the ONNX model file. If None, attempts to download
from HuggingFace Hub.
Returns:
ONNX Runtime InferenceSession
"""
import onnxruntime as ort
if model_path is None:
try:
from huggingface_hub import hf_hub_download
print("Downloading model from HuggingFace Hub...")
model_path = hf_hub_download(
repo_id="YOUR_USERNAME/uvdoc-grid-onnx", # Update with actual repo
filename="UVDoc_grid.onnx"
)
print(f"Model downloaded to: {model_path}")
except ImportError:
print("Error: huggingface_hub not installed. Install it or provide --model path.")
print(" pip install huggingface_hub")
sys.exit(1)
print(f"Loading model from: {model_path}")
session = ort.InferenceSession(
model_path,
providers=['CPUExecutionProvider']
)
return session
def preprocess_image(image: np.ndarray) -> np.ndarray:
"""
Preprocess image for UVDoc model input.
Args:
image: BGR image from cv2.imread()
Returns:
Preprocessed tensor of shape (1, 3, 720, 496)
"""
# Convert BGR to RGB
img_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
# Resize to model input size
resized = cv2.resize(img_rgb, (MODEL_INPUT_WIDTH, MODEL_INPUT_HEIGHT))
# Normalize to [0, 1]
normalized = resized.astype(np.float32) / 255.0
# Convert HWC to CHW format
transposed = np.transpose(normalized, (2, 0, 1))
# Add batch dimension
batched = np.expand_dims(transposed, axis=0)
return batched
def apply_grid_unwarping(
image: np.ndarray,
grid: np.ndarray,
interpolation: int = cv2.INTER_CUBIC
) -> np.ndarray:
"""
Apply the coordinate grid to unwarp the image.
Args:
image: Original BGR image (any resolution)
grid: Model output grid of shape (1, 2, 45, 31)
interpolation: OpenCV interpolation method
Returns:
Unwarped image at original resolution
"""
h_orig, w_orig = image.shape[:2]
# Remove batch dimension and transpose to (H, W, 2)
grid_2d = np.transpose(grid[0], (1, 2, 0)) # (45, 31, 2)
# Upscale grid to original image resolution
grid_upscaled = cv2.resize(
grid_2d,
(w_orig, h_orig),
interpolation=cv2.INTER_LINEAR
)
# Convert normalized coordinates [-1, 1] to pixel coordinates
# Grid channel 0 = x (width), channel 1 = y (height)
map_x = ((grid_upscaled[..., 0] + 1) / 2) * (w_orig - 1)
map_y = ((grid_upscaled[..., 1] + 1) / 2) * (h_orig - 1)
# Apply remapping
unwarped = cv2.remap(
image,
map_x.astype(np.float32),
map_y.astype(np.float32),
interpolation=interpolation,
borderMode=cv2.BORDER_REPLICATE
)
return unwarped
def unwarp_document(
image_path: str,
output_path: str,
model_path: str = None
) -> None:
"""
Main function to unwarp a document image.
Args:
image_path: Path to input warped document image
output_path: Path to save unwarped result
model_path: Optional path to ONNX model file
"""
# Load image
print(f"Loading image: {image_path}")
image = cv2.imread(image_path)
if image is None:
print(f"Error: Could not load image from {image_path}")
sys.exit(1)
h, w = image.shape[:2]
print(f"Image size: {w}x{h}")
# Load model
session = load_model(model_path)
# Get input name
input_name = session.get_inputs()[0].name
print(f"Model input name: {input_name}")
# Preprocess
print("Preprocessing image...")
input_tensor = preprocess_image(image)
print(f"Input tensor shape: {input_tensor.shape}")
# Run inference
print("Running inference...")
result = session.run(None, {input_name: input_tensor})[0]
print(f"Output grid shape: {result.shape}")
print(f"Output grid range: [{result.min():.4f}, {result.max():.4f}]")
# Apply unwarping
print("Applying grid-based unwarping...")
unwarped = apply_grid_unwarping(image, result)
# Save result
print(f"Saving result to: {output_path}")
cv2.imwrite(output_path, unwarped)
print("Done!")
def main():
parser = argparse.ArgumentParser(
description="Unwarp document images using UVDoc grid-output ONNX model",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
python example.py warped_doc.jpg unwarped_doc.jpg
python example.py warped_doc.jpg unwarped_doc.jpg --model UVDoc_grid.onnx
"""
)
parser.add_argument(
"input",
help="Path to input warped document image"
)
parser.add_argument(
"output",
help="Path to save unwarped output image"
)
parser.add_argument(
"--model", "-m",
default=None,
help="Path to UVDoc_grid.onnx model file (downloads from HuggingFace if not provided)"
)
args = parser.parse_args()
# Validate input file exists
if not Path(args.input).exists():
print(f"Error: Input file not found: {args.input}")
sys.exit(1)
unwarp_document(args.input, args.output, args.model)
if __name__ == "__main__":
main()