Spaces:
Paused
Paused
| """ | |
| AnyCalib β Full-Resolution Camera Calibration & Lens Correction | |
| Gradio Space running the full AnyCalib pipeline: | |
| 1. DINOv2 ViT-L/14 backbone β LightDPT decoder β ConvexTangentDecoder head | |
| 2. RANSAC + Gauss-Newton calibrator β camera intrinsics [f, cx, cy, k1, ...] | |
| 3. Full-resolution undistortion via grid_sample | |
| No resolution limits. No quantization. Full FP32 inference. | |
| Runs on GPU if available (ZeroGPU / dedicated), falls back to CPU. | |
| """ | |
| from __future__ import annotations | |
| import json | |
| import time | |
| import gradio as gr | |
| import numpy as np | |
| import torch | |
| # ββ GPU decorator (works on ZeroGPU Spaces, no-op elsewhere) ββ | |
| try: | |
| import spaces | |
| gpu_decorator = spaces.GPU(duration=120) | |
| except (ImportError, Exception): | |
| # Not on a ZeroGPU Space β use identity decorator | |
| def gpu_decorator(fn): | |
| return fn | |
| # ββ Load model at startup ββ | |
| from anycalib.model.anycalib_pretrained import AnyCalib | |
| from anycalib.cameras.factory import CameraFactory | |
| print("[anycalib] Loading model...") | |
| t0 = time.time() | |
| MODEL = AnyCalib(model_id="anycalib_gen") | |
| MODEL.eval() | |
| TOTAL_PARAMS = sum(p.numel() for p in MODEL.parameters()) | |
| print(f"[anycalib] Model loaded in {time.time() - t0:.1f}s ({TOTAL_PARAMS:,} params)") | |
| def _build_undistort_grid(camera, params, h, w, scale=1.0, target_proj="perspective"): | |
| """Build undistortion sampling grid (mirrors AnyCalibRunner._undistort_grid).""" | |
| params_b = params[None, ...] if params.ndim == 1 else params | |
| num_f = int(camera.NUM_F) | |
| f = params_b[..., None, :num_f] | |
| c = params_b[..., None, num_f:num_f + 2] | |
| im_coords = camera.pixel_grid_coords(h, w, params_b, 0.0).reshape(-1, 2) | |
| im_n = (im_coords - c) / f | |
| r = torch.linalg.norm(im_n, dim=-1) / scale | |
| theta = camera.ideal_unprojection(r, target_proj) | |
| phi = torch.atan2(im_n[..., 1], im_n[..., 0]) | |
| R = torch.sin(theta) | |
| rays = torch.stack((R * torch.cos(phi), R * torch.sin(phi), torch.cos(theta)), dim=-1) | |
| params_proj = params_b | |
| if num_f == 2: | |
| params_proj = params_b.clone() | |
| params_proj[..., :2] = f.amax(dim=-1, keepdim=True) | |
| map_xy, valid = camera.project(params_proj, rays) | |
| if valid is not None: | |
| valid = valid.reshape(1, h, w)[0] | |
| grid = 2.0 * map_xy.reshape(1, h, w, 2) / map_xy.new_tensor((w, h)) - 1.0 | |
| return grid, valid | |
| def run_calibration( | |
| input_image: np.ndarray, | |
| cam_id: str, | |
| scale: float, | |
| target_proj: str, | |
| padding_mode: str, | |
| interp_mode: str, | |
| k1_threshold: float, | |
| ): | |
| """Full pipeline: predict -> fit -> undistort at original resolution.""" | |
| if input_image is None: | |
| raise gr.Error("Please upload an image.") | |
| device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
| MODEL.to(device) | |
| h, w = input_image.shape[:2] | |
| t_total = time.time() | |
| # Preprocess | |
| x = input_image.astype("float32") / 255.0 | |
| x = np.transpose(x, (2, 0, 1)) | |
| x_t = torch.from_numpy(x).to(device) | |
| # Neural network inference | |
| t0 = time.time() | |
| out = MODEL.predict(x_t, cam_id=cam_id) | |
| intrinsics = out["intrinsics"] | |
| pred_size = out.get("pred_size") | |
| t_infer = time.time() - t0 | |
| # Parse intrinsics | |
| camera = CameraFactory.create_from_id(cam_id) | |
| num_f = int(camera.NUM_F) | |
| intr_list = intrinsics.detach().cpu().numpy().astype(np.float64).tolist() | |
| focal = intr_list[:num_f] | |
| cx_val, cy_val = intr_list[num_f], intr_list[num_f + 1] | |
| k1_val = intr_list[num_f + 2] if len(intr_list) > num_f + 2 else 0.0 | |
| f_px = focal[0] | |
| fov_h = float(2 * np.degrees(np.arctan(w / (2 * f_px)))) if f_px > 0 else 0 | |
| fov_v = float(2 * np.degrees(np.arctan(h / (2 * f_px)))) if f_px > 0 else 0 | |
| if k1_val < -0.001: | |
| dist_type = "Barrel (k1 < 0)" | |
| elif k1_val > 0.001: | |
| dist_type = "Pincushion (k1 > 0)" | |
| else: | |
| dist_type = "Negligible" | |
| skip_undistort = k1_threshold > 0 and abs(k1_val) < k1_threshold | |
| if skip_undistort: | |
| corrected = input_image.copy() | |
| valid_frac = 1.0 | |
| t_undistort = 0.0 | |
| else: | |
| t0 = time.time() | |
| grid, valid = _build_undistort_grid( | |
| camera, intrinsics, h, w, | |
| scale=scale, target_proj=target_proj, | |
| ) | |
| y_t = torch.nn.functional.grid_sample( | |
| x_t[None, ...], grid, | |
| mode=interp_mode, | |
| padding_mode=padding_mode, | |
| align_corners=False, | |
| ) | |
| t_undistort = time.time() - t0 | |
| valid_frac = float(valid.float().mean().item()) if valid is not None else 1.0 | |
| y = y_t[0].clamp(0, 1).detach().cpu().numpy() | |
| y = np.transpose(y, (1, 2, 0)) | |
| corrected = (y * 255.0 + 0.5).astype("uint8") | |
| t_total_elapsed = time.time() - t_total | |
| hw_label = "GPU" if device.type == "cuda" else "CPU" | |
| params_md = f""" | |
| ### Camera Intrinsics | |
| | Parameter | Value | | |
| |-----------|-------| | |
| | **Focal length** | `{f_px:.2f}` px | | |
| | **Principal point** | `({cx_val:.2f}, {cy_val:.2f})` px | | |
| | **Distortion k1** | `{k1_val:.6f}` | | |
| | **Distortion type** | {dist_type} | | |
| | **FOV (horizontal)** | `{fov_h:.1f}` deg | | |
| | **FOV (vertical)** | `{fov_v:.1f}` deg | | |
| | **Valid pixel fraction** | `{valid_frac:.3f}` | | |
| | **k1 gated (skipped)** | `{skip_undistort}` | | |
| ### Image Info | |
| | Property | Value | | |
| |----------|-------| | |
| | **Input resolution** | `{w} x {h}` ({w*h:,} px) | | |
| | **Model working size** | `{pred_size}` | | |
| | **Camera model** | `{cam_id}` | | |
| | **Scale** | `{scale}` | | |
| | **Target projection** | `{target_proj}` | | |
| ### Timing ({hw_label}) | |
| | Stage | Time | | |
| |-------|------| | |
| | Neural net inference | `{t_infer*1000:.0f}` ms | | |
| | Undistortion (grid_sample) | `{t_undistort*1000:.0f}` ms | | |
| | **Total** | **`{t_total_elapsed*1000:.0f}` ms** | | |
| | Hardware | `{device}` ({hw_label}) | | |
| """ | |
| raw_json = json.dumps({ | |
| "intrinsics": { | |
| "focal_length_px": focal, | |
| "principal_point": [cx_val, cy_val], | |
| "k1": k1_val, | |
| }, | |
| "fov": {"horizontal_deg": fov_h, "vertical_deg": fov_v}, | |
| "distortion": {"type": dist_type, "k1_gated": skip_undistort}, | |
| "image": { | |
| "input_resolution": [w, h], | |
| "total_pixels": w * h, | |
| "model_working_size": pred_size, | |
| }, | |
| "camera": { | |
| "model": cam_id, | |
| "scale": scale, | |
| "target_projection": target_proj, | |
| "padding_mode": padding_mode, | |
| "interpolation": interp_mode, | |
| }, | |
| "quality": { | |
| "valid_pixel_fraction": valid_frac, | |
| }, | |
| "timing_ms": { | |
| "neural_net": round(t_infer * 1000, 1), | |
| "undistortion": round(t_undistort * 1000, 1), | |
| "total": round(t_total_elapsed * 1000, 1), | |
| }, | |
| "device": str(device), | |
| "all_intrinsics_raw": intr_list, | |
| }, indent=2) | |
| return corrected, params_md, raw_json | |
| # ββ Gradio UI ββ | |
| with gr.Blocks() as demo: | |
| gr.Markdown(""" | |
| # AnyCalib β Full-Resolution Camera Calibration | |
| Single-image lens calibration & distortion correction powered by | |
| [AnyCalib](https://github.com/javrtg/AnyCalib) (DINOv2 ViT-L/14 + LightDPT + ConvexTangentDecoder, ~320M params). | |
| Full FP32 inference, no quantization, no resolution limits. Automatically uses GPU when available. | |
| Upload any image and get the **corrected (undistorted) image** at original resolution, | |
| plus camera intrinsics, FOV, distortion parameters, and timing. | |
| """) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| input_image = gr.Image( | |
| label="Input Image", | |
| type="numpy", | |
| sources=["upload", "clipboard"], | |
| ) | |
| with gr.Accordion("Advanced Settings", open=False): | |
| cam_id = gr.Dropdown( | |
| label="Camera Model", | |
| choices=[ | |
| "simple_division:1", | |
| "division:1", | |
| "simple_radial:1", | |
| "simple_kb:1", | |
| "simple_pinhole", | |
| "pinhole", | |
| ], | |
| value="simple_division:1", | |
| ) | |
| scale = gr.Slider( | |
| label="Focal Length Scale (< 1 = wider FOV, less crop)", | |
| minimum=0.5, maximum=1.5, step=0.05, value=1.0, | |
| ) | |
| target_proj = gr.Dropdown( | |
| label="Target Projection", | |
| choices=["perspective", "stereographic", "equidistant", "equisolid", "orthographic"], | |
| value="perspective", | |
| ) | |
| padding_mode = gr.Dropdown( | |
| label="Padding Mode", | |
| choices=["border", "zeros", "reflection"], | |
| value="border", | |
| ) | |
| interp_mode = gr.Dropdown( | |
| label="Interpolation", | |
| choices=["bilinear", "bicubic", "nearest"], | |
| value="bilinear", | |
| ) | |
| k1_threshold = gr.Slider( | |
| label="k1 Threshold (skip undistortion if |k1| below this)", | |
| minimum=0.0, maximum=0.1, step=0.005, value=0.0, | |
| ) | |
| run_btn = gr.Button("Run Calibration", variant="primary", size="lg") | |
| with gr.Column(scale=1): | |
| output_image = gr.Image(label="Corrected (Undistorted) Image", type="numpy") | |
| with gr.Row(): | |
| with gr.Column(): | |
| params_output = gr.Markdown(label="Camera Parameters") | |
| with gr.Column(): | |
| json_output = gr.Code(label="Raw JSON Output", language="json") | |
| gr.Markdown(""" | |
| --- | |
| ### How it works | |
| 1. **Upload** any image (phone photo, action cam, drone, dashcam, etc.) | |
| 2. The model predicts per-pixel **ray directions** using a DINOv2 ViT-L/14 backbone | |
| 3. **RANSAC + Gauss-Newton** calibrator fits camera intrinsics `[f, cx, cy, k1]` from the rays | |
| 4. Image is **undistorted at full resolution** via differentiable grid_sample | |
| 5. All parameters and raw JSON output are displayed | |
| ### Links | |
| - Raw weights: [SebRincon/anycalib](https://huggingface.co/SebRincon/anycalib) (safetensors) | |
| - ONNX models: [SebRincon/anycalib-onnx](https://huggingface.co/SebRincon/anycalib-onnx) (FP32/FP16/INT8) | |
| - WASM demo: [SebRincon/anycalib-wasm](https://huggingface.co/spaces/SebRincon/anycalib-wasm) (browser-only) | |
| - Source: [github.com/javrtg/AnyCalib](https://github.com/javrtg/AnyCalib) | |
| """) | |
| run_btn.click( | |
| fn=run_calibration, | |
| inputs=[input_image, cam_id, scale, target_proj, padding_mode, interp_mode, k1_threshold], | |
| outputs=[output_image, params_output, json_output], | |
| ) | |
| input_image.change( | |
| fn=run_calibration, | |
| inputs=[input_image, cam_id, scale, target_proj, padding_mode, interp_mode, k1_threshold], | |
| outputs=[output_image, params_output, json_output], | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() | |