File size: 6,279 Bytes
39c8284
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
#!/usr/bin/env python3
"""
UVDoc Grid-Output Document Unwarping Example

This script demonstrates how to use the UVDoc ONNX model with grid output
for high-resolution document unwarping.

The key advantage of this grid-output model over image-output models is that
the coordinate grid can be upscaled to any resolution, preserving document
quality when applied via cv2.remap().

Usage:
    python example.py input_image.jpg output_image.jpg
    python example.py input_image.jpg output_image.jpg --model path/to/UVDoc_grid.onnx

Requirements:
    pip install onnxruntime opencv-python numpy

Optional (for automatic model download):
    pip install huggingface_hub
"""

import argparse
import sys
from pathlib import Path

import cv2
import numpy as np

# Model input dimensions (fixed for UVDoc architecture)
MODEL_INPUT_HEIGHT = 720
MODEL_INPUT_WIDTH = 496


def load_model(model_path: str = None):
    """
    Load the ONNX model.

    Args:
        model_path: Path to the ONNX model file. If None, attempts to download
                    from HuggingFace Hub.

    Returns:
        ONNX Runtime InferenceSession
    """
    import onnxruntime as ort

    if model_path is None:
        try:
            from huggingface_hub import hf_hub_download

            print("Downloading model from HuggingFace Hub...")
            model_path = hf_hub_download(
                repo_id="YOUR_USERNAME/uvdoc-grid-onnx",  # Update with actual repo
                filename="UVDoc_grid.onnx"
            )
            print(f"Model downloaded to: {model_path}")
        except ImportError:
            print("Error: huggingface_hub not installed. Install it or provide --model path.")
            print("  pip install huggingface_hub")
            sys.exit(1)

    print(f"Loading model from: {model_path}")
    session = ort.InferenceSession(
        model_path,
        providers=['CPUExecutionProvider']
    )

    return session


def preprocess_image(image: np.ndarray) -> np.ndarray:
    """
    Preprocess image for UVDoc model input.

    Args:
        image: BGR image from cv2.imread()

    Returns:
        Preprocessed tensor of shape (1, 3, 720, 496)
    """
    # Convert BGR to RGB
    img_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

    # Resize to model input size
    resized = cv2.resize(img_rgb, (MODEL_INPUT_WIDTH, MODEL_INPUT_HEIGHT))

    # Normalize to [0, 1]
    normalized = resized.astype(np.float32) / 255.0

    # Convert HWC to CHW format
    transposed = np.transpose(normalized, (2, 0, 1))

    # Add batch dimension
    batched = np.expand_dims(transposed, axis=0)

    return batched


def apply_grid_unwarping(
    image: np.ndarray,
    grid: np.ndarray,
    interpolation: int = cv2.INTER_CUBIC
) -> np.ndarray:
    """
    Apply the coordinate grid to unwarp the image.

    Args:
        image: Original BGR image (any resolution)
        grid: Model output grid of shape (1, 2, 45, 31)
        interpolation: OpenCV interpolation method

    Returns:
        Unwarped image at original resolution
    """
    h_orig, w_orig = image.shape[:2]

    # Remove batch dimension and transpose to (H, W, 2)
    grid_2d = np.transpose(grid[0], (1, 2, 0))  # (45, 31, 2)

    # Upscale grid to original image resolution
    grid_upscaled = cv2.resize(
        grid_2d,
        (w_orig, h_orig),
        interpolation=cv2.INTER_LINEAR
    )

    # Convert normalized coordinates [-1, 1] to pixel coordinates
    # Grid channel 0 = x (width), channel 1 = y (height)
    map_x = ((grid_upscaled[..., 0] + 1) / 2) * (w_orig - 1)
    map_y = ((grid_upscaled[..., 1] + 1) / 2) * (h_orig - 1)

    # Apply remapping
    unwarped = cv2.remap(
        image,
        map_x.astype(np.float32),
        map_y.astype(np.float32),
        interpolation=interpolation,
        borderMode=cv2.BORDER_REPLICATE
    )

    return unwarped


def unwarp_document(
    image_path: str,
    output_path: str,
    model_path: str = None
) -> None:
    """
    Main function to unwarp a document image.

    Args:
        image_path: Path to input warped document image
        output_path: Path to save unwarped result
        model_path: Optional path to ONNX model file
    """
    # Load image
    print(f"Loading image: {image_path}")
    image = cv2.imread(image_path)
    if image is None:
        print(f"Error: Could not load image from {image_path}")
        sys.exit(1)

    h, w = image.shape[:2]
    print(f"Image size: {w}x{h}")

    # Load model
    session = load_model(model_path)

    # Get input name
    input_name = session.get_inputs()[0].name
    print(f"Model input name: {input_name}")

    # Preprocess
    print("Preprocessing image...")
    input_tensor = preprocess_image(image)
    print(f"Input tensor shape: {input_tensor.shape}")

    # Run inference
    print("Running inference...")
    result = session.run(None, {input_name: input_tensor})[0]
    print(f"Output grid shape: {result.shape}")
    print(f"Output grid range: [{result.min():.4f}, {result.max():.4f}]")

    # Apply unwarping
    print("Applying grid-based unwarping...")
    unwarped = apply_grid_unwarping(image, result)

    # Save result
    print(f"Saving result to: {output_path}")
    cv2.imwrite(output_path, unwarped)

    print("Done!")


def main():
    parser = argparse.ArgumentParser(
        description="Unwarp document images using UVDoc grid-output ONNX model",
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog="""
Examples:
    python example.py warped_doc.jpg unwarped_doc.jpg
    python example.py warped_doc.jpg unwarped_doc.jpg --model UVDoc_grid.onnx
        """
    )

    parser.add_argument(
        "input",
        help="Path to input warped document image"
    )

    parser.add_argument(
        "output",
        help="Path to save unwarped output image"
    )

    parser.add_argument(
        "--model", "-m",
        default=None,
        help="Path to UVDoc_grid.onnx model file (downloads from HuggingFace if not provided)"
    )

    args = parser.parse_args()

    # Validate input file exists
    if not Path(args.input).exists():
        print(f"Error: Input file not found: {args.input}")
        sys.exit(1)

    unwarp_document(args.input, args.output, args.model)


if __name__ == "__main__":
    main()