uvdoc-grid-onnx / example.py

Upload folder using huggingface_hub

39c8284 verified about 21 hours ago

6.28 kB

	#!/usr/bin/env python3
	"""
	UVDoc Grid-Output Document Unwarping Example

	This script demonstrates how to use the UVDoc ONNX model with grid output
	for high-resolution document unwarping.

	The key advantage of this grid-output model over image-output models is that
	the coordinate grid can be upscaled to any resolution, preserving document
	quality when applied via cv2.remap().

	Usage:
	python example.py input_image.jpg output_image.jpg
	python example.py input_image.jpg output_image.jpg --model path/to/UVDoc_grid.onnx

	Requirements:
	pip install onnxruntime opencv-python numpy

	Optional (for automatic model download):
	pip install huggingface_hub
	"""

	import argparse
	import sys
	from pathlib import Path

	import cv2
	import numpy as np

	# Model input dimensions (fixed for UVDoc architecture)
	MODEL_INPUT_HEIGHT = 720
	MODEL_INPUT_WIDTH = 496


	def load_model(model_path: str = None):
	"""
	Load the ONNX model.

	Args:
	model_path: Path to the ONNX model file. If None, attempts to download
	from HuggingFace Hub.

	Returns:
	ONNX Runtime InferenceSession
	"""
	import onnxruntime as ort

	if model_path is None:
	try:
	from huggingface_hub import hf_hub_download

	print("Downloading model from HuggingFace Hub...")
	model_path = hf_hub_download(
	repo_id="YOUR_USERNAME/uvdoc-grid-onnx", # Update with actual repo
	filename="UVDoc_grid.onnx"
	)
	print(f"Model downloaded to: {model_path}")
	except ImportError:
	print("Error: huggingface_hub not installed. Install it or provide --model path.")
	print(" pip install huggingface_hub")
	sys.exit(1)

	print(f"Loading model from: {model_path}")
	session = ort.InferenceSession(
	model_path,
	providers=['CPUExecutionProvider']
	)

	return session


	def preprocess_image(image: np.ndarray) -> np.ndarray:
	"""
	Preprocess image for UVDoc model input.

	Args:
	image: BGR image from cv2.imread()

	Returns:
	Preprocessed tensor of shape (1, 3, 720, 496)
	"""
	# Convert BGR to RGB
	img_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

	# Resize to model input size
	resized = cv2.resize(img_rgb, (MODEL_INPUT_WIDTH, MODEL_INPUT_HEIGHT))

	# Normalize to [0, 1]
	normalized = resized.astype(np.float32) / 255.0

	# Convert HWC to CHW format
	transposed = np.transpose(normalized, (2, 0, 1))

	# Add batch dimension
	batched = np.expand_dims(transposed, axis=0)

	return batched


	def apply_grid_unwarping(
	image: np.ndarray,
	grid: np.ndarray,
	interpolation: int = cv2.INTER_CUBIC
	) -> np.ndarray:
	"""
	Apply the coordinate grid to unwarp the image.

	Args:
	image: Original BGR image (any resolution)
	grid: Model output grid of shape (1, 2, 45, 31)
	interpolation: OpenCV interpolation method

	Returns:
	Unwarped image at original resolution
	"""
	h_orig, w_orig = image.shape[:2]

	# Remove batch dimension and transpose to (H, W, 2)
	grid_2d = np.transpose(grid[0], (1, 2, 0)) # (45, 31, 2)

	# Upscale grid to original image resolution
	grid_upscaled = cv2.resize(
	grid_2d,
	(w_orig, h_orig),
	interpolation=cv2.INTER_LINEAR
	)

	# Convert normalized coordinates [-1, 1] to pixel coordinates
	# Grid channel 0 = x (width), channel 1 = y (height)
	map_x = ((grid_upscaled[..., 0] + 1) / 2) * (w_orig - 1)
	map_y = ((grid_upscaled[..., 1] + 1) / 2) * (h_orig - 1)

	# Apply remapping
	unwarped = cv2.remap(
	image,
	map_x.astype(np.float32),
	map_y.astype(np.float32),
	interpolation=interpolation,
	borderMode=cv2.BORDER_REPLICATE
	)

	return unwarped


	def unwarp_document(
	image_path: str,
	output_path: str,
	model_path: str = None
	) -> None:
	"""
	Main function to unwarp a document image.

	Args:
	image_path: Path to input warped document image
	output_path: Path to save unwarped result
	model_path: Optional path to ONNX model file
	"""
	# Load image
	print(f"Loading image: {image_path}")
	image = cv2.imread(image_path)
	if image is None:
	print(f"Error: Could not load image from {image_path}")
	sys.exit(1)

	h, w = image.shape[:2]
	print(f"Image size: {w}x{h}")

	# Load model
	session = load_model(model_path)

	# Get input name
	input_name = session.get_inputs()[0].name
	print(f"Model input name: {input_name}")

	# Preprocess
	print("Preprocessing image...")
	input_tensor = preprocess_image(image)
	print(f"Input tensor shape: {input_tensor.shape}")

	# Run inference
	print("Running inference...")
	result = session.run(None, {input_name: input_tensor})[0]
	print(f"Output grid shape: {result.shape}")
	print(f"Output grid range: [{result.min():.4f}, {result.max():.4f}]")

	# Apply unwarping
	print("Applying grid-based unwarping...")
	unwarped = apply_grid_unwarping(image, result)

	# Save result
	print(f"Saving result to: {output_path}")
	cv2.imwrite(output_path, unwarped)

	print("Done!")


	def main():
	parser = argparse.ArgumentParser(
	description="Unwarp document images using UVDoc grid-output ONNX model",
	formatter_class=argparse.RawDescriptionHelpFormatter,
	epilog="""
	Examples:
	python example.py warped_doc.jpg unwarped_doc.jpg
	python example.py warped_doc.jpg unwarped_doc.jpg --model UVDoc_grid.onnx
	"""
	)

	parser.add_argument(
	"input",
	help="Path to input warped document image"
	)

	parser.add_argument(
	"output",
	help="Path to save unwarped output image"
	)

	parser.add_argument(
	"--model", "-m",
	default=None,
	help="Path to UVDoc_grid.onnx model file (downloads from HuggingFace if not provided)"
	)

	args = parser.parse_args()

	# Validate input file exists
	if not Path(args.input).exists():
	print(f"Error: Input file not found: {args.input}")
	sys.exit(1)

	unwarp_document(args.input, args.output, args.model)


	if __name__ == "__main__":
	main()