""" AnyCalib — Full-Resolution Camera Calibration & Lens Correction Gradio Space running the full AnyCalib pipeline: 1. DINOv2 ViT-L/14 backbone → LightDPT decoder → ConvexTangentDecoder head 2. RANSAC + Gauss-Newton calibrator → camera intrinsics [f, cx, cy, k1, ...] 3. Full-resolution undistortion via grid_sample No resolution limits. No quantization. Full FP32 inference. Runs on GPU if available (ZeroGPU / dedicated), falls back to CPU. """ from __future__ import annotations import json import time import gradio as gr import numpy as np import torch # ── GPU decorator (works on ZeroGPU Spaces, no-op elsewhere) ── try: import spaces gpu_decorator = spaces.GPU(duration=120) except (ImportError, Exception): # Not on a ZeroGPU Space — use identity decorator def gpu_decorator(fn): return fn # ── Load model at startup ── from anycalib.model.anycalib_pretrained import AnyCalib from anycalib.cameras.factory import CameraFactory print("[anycalib] Loading model...") t0 = time.time() MODEL = AnyCalib(model_id="anycalib_gen") MODEL.eval() TOTAL_PARAMS = sum(p.numel() for p in MODEL.parameters()) print(f"[anycalib] Model loaded in {time.time() - t0:.1f}s ({TOTAL_PARAMS:,} params)") def _build_undistort_grid(camera, params, h, w, scale=1.0, target_proj="perspective"): """Build undistortion sampling grid (mirrors AnyCalibRunner._undistort_grid).""" params_b = params[None, ...] if params.ndim == 1 else params num_f = int(camera.NUM_F) f = params_b[..., None, :num_f] c = params_b[..., None, num_f:num_f + 2] im_coords = camera.pixel_grid_coords(h, w, params_b, 0.0).reshape(-1, 2) im_n = (im_coords - c) / f r = torch.linalg.norm(im_n, dim=-1) / scale theta = camera.ideal_unprojection(r, target_proj) phi = torch.atan2(im_n[..., 1], im_n[..., 0]) R = torch.sin(theta) rays = torch.stack((R * torch.cos(phi), R * torch.sin(phi), torch.cos(theta)), dim=-1) params_proj = params_b if num_f == 2: params_proj = params_b.clone() params_proj[..., :2] = f.amax(dim=-1, keepdim=True) map_xy, valid = camera.project(params_proj, rays) if valid is not None: valid = valid.reshape(1, h, w)[0] grid = 2.0 * map_xy.reshape(1, h, w, 2) / map_xy.new_tensor((w, h)) - 1.0 return grid, valid @gpu_decorator @torch.no_grad() def run_calibration( input_image: np.ndarray, cam_id: str, scale: float, target_proj: str, padding_mode: str, interp_mode: str, k1_threshold: float, ): """Full pipeline: predict -> fit -> undistort at original resolution.""" if input_image is None: raise gr.Error("Please upload an image.") device = torch.device("cuda" if torch.cuda.is_available() else "cpu") MODEL.to(device) h, w = input_image.shape[:2] t_total = time.time() # Preprocess x = input_image.astype("float32") / 255.0 x = np.transpose(x, (2, 0, 1)) x_t = torch.from_numpy(x).to(device) # Neural network inference t0 = time.time() out = MODEL.predict(x_t, cam_id=cam_id) intrinsics = out["intrinsics"] pred_size = out.get("pred_size") t_infer = time.time() - t0 # Parse intrinsics camera = CameraFactory.create_from_id(cam_id) num_f = int(camera.NUM_F) intr_list = intrinsics.detach().cpu().numpy().astype(np.float64).tolist() focal = intr_list[:num_f] cx_val, cy_val = intr_list[num_f], intr_list[num_f + 1] k1_val = intr_list[num_f + 2] if len(intr_list) > num_f + 2 else 0.0 f_px = focal[0] fov_h = float(2 * np.degrees(np.arctan(w / (2 * f_px)))) if f_px > 0 else 0 fov_v = float(2 * np.degrees(np.arctan(h / (2 * f_px)))) if f_px > 0 else 0 if k1_val < -0.001: dist_type = "Barrel (k1 < 0)" elif k1_val > 0.001: dist_type = "Pincushion (k1 > 0)" else: dist_type = "Negligible" skip_undistort = k1_threshold > 0 and abs(k1_val) < k1_threshold if skip_undistort: corrected = input_image.copy() valid_frac = 1.0 t_undistort = 0.0 else: t0 = time.time() grid, valid = _build_undistort_grid( camera, intrinsics, h, w, scale=scale, target_proj=target_proj, ) y_t = torch.nn.functional.grid_sample( x_t[None, ...], grid, mode=interp_mode, padding_mode=padding_mode, align_corners=False, ) t_undistort = time.time() - t0 valid_frac = float(valid.float().mean().item()) if valid is not None else 1.0 y = y_t[0].clamp(0, 1).detach().cpu().numpy() y = np.transpose(y, (1, 2, 0)) corrected = (y * 255.0 + 0.5).astype("uint8") t_total_elapsed = time.time() - t_total hw_label = "GPU" if device.type == "cuda" else "CPU" params_md = f""" ### Camera Intrinsics | Parameter | Value | |-----------|-------| | **Focal length** | `{f_px:.2f}` px | | **Principal point** | `({cx_val:.2f}, {cy_val:.2f})` px | | **Distortion k1** | `{k1_val:.6f}` | | **Distortion type** | {dist_type} | | **FOV (horizontal)** | `{fov_h:.1f}` deg | | **FOV (vertical)** | `{fov_v:.1f}` deg | | **Valid pixel fraction** | `{valid_frac:.3f}` | | **k1 gated (skipped)** | `{skip_undistort}` | ### Image Info | Property | Value | |----------|-------| | **Input resolution** | `{w} x {h}` ({w*h:,} px) | | **Model working size** | `{pred_size}` | | **Camera model** | `{cam_id}` | | **Scale** | `{scale}` | | **Target projection** | `{target_proj}` | ### Timing ({hw_label}) | Stage | Time | |-------|------| | Neural net inference | `{t_infer*1000:.0f}` ms | | Undistortion (grid_sample) | `{t_undistort*1000:.0f}` ms | | **Total** | **`{t_total_elapsed*1000:.0f}` ms** | | Hardware | `{device}` ({hw_label}) | """ raw_json = json.dumps({ "intrinsics": { "focal_length_px": focal, "principal_point": [cx_val, cy_val], "k1": k1_val, }, "fov": {"horizontal_deg": fov_h, "vertical_deg": fov_v}, "distortion": {"type": dist_type, "k1_gated": skip_undistort}, "image": { "input_resolution": [w, h], "total_pixels": w * h, "model_working_size": pred_size, }, "camera": { "model": cam_id, "scale": scale, "target_projection": target_proj, "padding_mode": padding_mode, "interpolation": interp_mode, }, "quality": { "valid_pixel_fraction": valid_frac, }, "timing_ms": { "neural_net": round(t_infer * 1000, 1), "undistortion": round(t_undistort * 1000, 1), "total": round(t_total_elapsed * 1000, 1), }, "device": str(device), "all_intrinsics_raw": intr_list, }, indent=2) return corrected, params_md, raw_json # ── Gradio UI ── with gr.Blocks() as demo: gr.Markdown(""" # AnyCalib — Full-Resolution Camera Calibration Single-image lens calibration & distortion correction powered by [AnyCalib](https://github.com/javrtg/AnyCalib) (DINOv2 ViT-L/14 + LightDPT + ConvexTangentDecoder, ~320M params). Full FP32 inference, no quantization, no resolution limits. Automatically uses GPU when available. Upload any image and get the **corrected (undistorted) image** at original resolution, plus camera intrinsics, FOV, distortion parameters, and timing. """) with gr.Row(): with gr.Column(scale=1): input_image = gr.Image( label="Input Image", type="numpy", sources=["upload", "clipboard"], ) with gr.Accordion("Advanced Settings", open=False): cam_id = gr.Dropdown( label="Camera Model", choices=[ "simple_division:1", "division:1", "simple_radial:1", "simple_kb:1", "simple_pinhole", "pinhole", ], value="simple_division:1", ) scale = gr.Slider( label="Focal Length Scale (< 1 = wider FOV, less crop)", minimum=0.5, maximum=1.5, step=0.05, value=1.0, ) target_proj = gr.Dropdown( label="Target Projection", choices=["perspective", "stereographic", "equidistant", "equisolid", "orthographic"], value="perspective", ) padding_mode = gr.Dropdown( label="Padding Mode", choices=["border", "zeros", "reflection"], value="border", ) interp_mode = gr.Dropdown( label="Interpolation", choices=["bilinear", "bicubic", "nearest"], value="bilinear", ) k1_threshold = gr.Slider( label="k1 Threshold (skip undistortion if |k1| below this)", minimum=0.0, maximum=0.1, step=0.005, value=0.0, ) run_btn = gr.Button("Run Calibration", variant="primary", size="lg") with gr.Column(scale=1): output_image = gr.Image(label="Corrected (Undistorted) Image", type="numpy") with gr.Row(): with gr.Column(): params_output = gr.Markdown(label="Camera Parameters") with gr.Column(): json_output = gr.Code(label="Raw JSON Output", language="json") gr.Markdown(""" --- ### How it works 1. **Upload** any image (phone photo, action cam, drone, dashcam, etc.) 2. The model predicts per-pixel **ray directions** using a DINOv2 ViT-L/14 backbone 3. **RANSAC + Gauss-Newton** calibrator fits camera intrinsics `[f, cx, cy, k1]` from the rays 4. Image is **undistorted at full resolution** via differentiable grid_sample 5. All parameters and raw JSON output are displayed ### Links - Raw weights: [SebRincon/anycalib](https://huggingface.co/SebRincon/anycalib) (safetensors) - ONNX models: [SebRincon/anycalib-onnx](https://huggingface.co/SebRincon/anycalib-onnx) (FP32/FP16/INT8) - WASM demo: [SebRincon/anycalib-wasm](https://huggingface.co/spaces/SebRincon/anycalib-wasm) (browser-only) - Source: [github.com/javrtg/AnyCalib](https://github.com/javrtg/AnyCalib) """) run_btn.click( fn=run_calibration, inputs=[input_image, cam_id, scale, target_proj, padding_mode, interp_mode, k1_threshold], outputs=[output_image, params_output, json_output], ) input_image.change( fn=run_calibration, inputs=[input_image, cam_id, scale, target_proj, padding_mode, interp_mode, k1_threshold], outputs=[output_image, params_output, json_output], ) if __name__ == "__main__": demo.launch()