#!/usr/bin/env python3 """ UVDoc Grid-Output Document Unwarping Example This script demonstrates how to use the UVDoc ONNX model with grid output for high-resolution document unwarping. The key advantage of this grid-output model over image-output models is that the coordinate grid can be upscaled to any resolution, preserving document quality when applied via cv2.remap(). Usage: python example.py input_image.jpg output_image.jpg python example.py input_image.jpg output_image.jpg --model path/to/UVDoc_grid.onnx Requirements: pip install onnxruntime opencv-python numpy Optional (for automatic model download): pip install huggingface_hub """ import argparse import sys from pathlib import Path import cv2 import numpy as np # Model input dimensions (fixed for UVDoc architecture) MODEL_INPUT_HEIGHT = 720 MODEL_INPUT_WIDTH = 496 def load_model(model_path: str = None): """ Load the ONNX model. Args: model_path: Path to the ONNX model file. If None, attempts to download from HuggingFace Hub. Returns: ONNX Runtime InferenceSession """ import onnxruntime as ort if model_path is None: try: from huggingface_hub import hf_hub_download print("Downloading model from HuggingFace Hub...") model_path = hf_hub_download( repo_id="YOUR_USERNAME/uvdoc-grid-onnx", # Update with actual repo filename="UVDoc_grid.onnx" ) print(f"Model downloaded to: {model_path}") except ImportError: print("Error: huggingface_hub not installed. Install it or provide --model path.") print(" pip install huggingface_hub") sys.exit(1) print(f"Loading model from: {model_path}") session = ort.InferenceSession( model_path, providers=['CPUExecutionProvider'] ) return session def preprocess_image(image: np.ndarray) -> np.ndarray: """ Preprocess image for UVDoc model input. Args: image: BGR image from cv2.imread() Returns: Preprocessed tensor of shape (1, 3, 720, 496) """ # Convert BGR to RGB img_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # Resize to model input size resized = cv2.resize(img_rgb, (MODEL_INPUT_WIDTH, MODEL_INPUT_HEIGHT)) # Normalize to [0, 1] normalized = resized.astype(np.float32) / 255.0 # Convert HWC to CHW format transposed = np.transpose(normalized, (2, 0, 1)) # Add batch dimension batched = np.expand_dims(transposed, axis=0) return batched def apply_grid_unwarping( image: np.ndarray, grid: np.ndarray, interpolation: int = cv2.INTER_CUBIC ) -> np.ndarray: """ Apply the coordinate grid to unwarp the image. Args: image: Original BGR image (any resolution) grid: Model output grid of shape (1, 2, 45, 31) interpolation: OpenCV interpolation method Returns: Unwarped image at original resolution """ h_orig, w_orig = image.shape[:2] # Remove batch dimension and transpose to (H, W, 2) grid_2d = np.transpose(grid[0], (1, 2, 0)) # (45, 31, 2) # Upscale grid to original image resolution grid_upscaled = cv2.resize( grid_2d, (w_orig, h_orig), interpolation=cv2.INTER_LINEAR ) # Convert normalized coordinates [-1, 1] to pixel coordinates # Grid channel 0 = x (width), channel 1 = y (height) map_x = ((grid_upscaled[..., 0] + 1) / 2) * (w_orig - 1) map_y = ((grid_upscaled[..., 1] + 1) / 2) * (h_orig - 1) # Apply remapping unwarped = cv2.remap( image, map_x.astype(np.float32), map_y.astype(np.float32), interpolation=interpolation, borderMode=cv2.BORDER_REPLICATE ) return unwarped def unwarp_document( image_path: str, output_path: str, model_path: str = None ) -> None: """ Main function to unwarp a document image. Args: image_path: Path to input warped document image output_path: Path to save unwarped result model_path: Optional path to ONNX model file """ # Load image print(f"Loading image: {image_path}") image = cv2.imread(image_path) if image is None: print(f"Error: Could not load image from {image_path}") sys.exit(1) h, w = image.shape[:2] print(f"Image size: {w}x{h}") # Load model session = load_model(model_path) # Get input name input_name = session.get_inputs()[0].name print(f"Model input name: {input_name}") # Preprocess print("Preprocessing image...") input_tensor = preprocess_image(image) print(f"Input tensor shape: {input_tensor.shape}") # Run inference print("Running inference...") result = session.run(None, {input_name: input_tensor})[0] print(f"Output grid shape: {result.shape}") print(f"Output grid range: [{result.min():.4f}, {result.max():.4f}]") # Apply unwarping print("Applying grid-based unwarping...") unwarped = apply_grid_unwarping(image, result) # Save result print(f"Saving result to: {output_path}") cv2.imwrite(output_path, unwarped) print("Done!") def main(): parser = argparse.ArgumentParser( description="Unwarp document images using UVDoc grid-output ONNX model", formatter_class=argparse.RawDescriptionHelpFormatter, epilog=""" Examples: python example.py warped_doc.jpg unwarped_doc.jpg python example.py warped_doc.jpg unwarped_doc.jpg --model UVDoc_grid.onnx """ ) parser.add_argument( "input", help="Path to input warped document image" ) parser.add_argument( "output", help="Path to save unwarped output image" ) parser.add_argument( "--model", "-m", default=None, help="Path to UVDoc_grid.onnx model file (downloads from HuggingFace if not provided)" ) args = parser.parse_args() # Validate input file exists if not Path(args.input).exists(): print(f"Error: Input file not found: {args.input}") sys.exit(1) unwarp_document(args.input, args.output, args.model) if __name__ == "__main__": main()