Upload AnyCalib ONNX exports (FP32 + FP16 + INT8)
Browse files- README.md +103 -0
- config.json +56 -0
- model_fp16.onnx +3 -0
- model_fp32.onnx +3 -0
- model_int8.onnx +3 -0
README.md
ADDED
|
@@ -0,0 +1,103 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
tags:
|
| 3 |
+
- onnx
|
| 4 |
+
- camera-calibration
|
| 5 |
+
- anycalib
|
| 6 |
+
- computer-vision
|
| 7 |
+
- lens-correction
|
| 8 |
+
- wasm
|
| 9 |
+
- onnxruntime-web
|
| 10 |
+
library_name: onnxruntime
|
| 11 |
+
pipeline_tag: image-to-image
|
| 12 |
+
license: apache-2.0
|
| 13 |
+
---
|
| 14 |
+
|
| 15 |
+
# AnyCalib ONNX — Ray Prediction Head
|
| 16 |
+
|
| 17 |
+
ONNX export of the [AnyCalib](https://github.com/javrtg/AnyCalib) ray prediction neural network,
|
| 18 |
+
ready for deployment with ONNX Runtime (Python, C++, Web/WASM, Mobile).
|
| 19 |
+
|
| 20 |
+
## Variants
|
| 21 |
+
|
| 22 |
+
| Variant | File | Size | Use Case |
|
| 23 |
+
|---------|------|------|----------|
|
| 24 |
+
| FP32 | `model_fp32.onnx` | 1222 MB | Maximum accuracy |
|
| 25 |
+
| FP16 | `model_fp16.onnx` | 611 MB | Good accuracy, half memory |
|
| 26 |
+
| INT8 | `model_int8.onnx` | 311 MB | Fastest, smallest, quantized |
|
| 27 |
+
|
| 28 |
+
## Architecture
|
| 29 |
+
|
| 30 |
+
- **Backbone**: DINOv2 ViT-L/14 (304M params)
|
| 31 |
+
- **Decoder**: LightDPT (15.2M params)
|
| 32 |
+
- **Head**: ConvexTangentDecoder (0.6M params)
|
| 33 |
+
- **Source model**: `anycalib_gen`
|
| 34 |
+
|
| 35 |
+
## Usage — Python
|
| 36 |
+
|
| 37 |
+
```python
|
| 38 |
+
import onnxruntime as ort
|
| 39 |
+
import numpy as np
|
| 40 |
+
|
| 41 |
+
sess = ort.InferenceSession("model_fp16.onnx")
|
| 42 |
+
|
| 43 |
+
# RGB [0,1], size must be divisible by 14
|
| 44 |
+
image = np.random.rand(1, 3, 518, 518).astype(np.float32)
|
| 45 |
+
|
| 46 |
+
rays, tangent_coords = sess.run(None, {"image": image})
|
| 47 |
+
# rays: (1, 3, 518, 518) — unit rays per pixel
|
| 48 |
+
# tangent_coords: (1, 2, 518, 518) — tangent space coords
|
| 49 |
+
```
|
| 50 |
+
|
| 51 |
+
## Usage — ONNX Runtime Web (WASM)
|
| 52 |
+
|
| 53 |
+
```javascript
|
| 54 |
+
import * as ort from 'onnxruntime-web';
|
| 55 |
+
|
| 56 |
+
// Use WASM backend
|
| 57 |
+
ort.env.wasm.wasmPaths = 'https://cdn.jsdelivr.net/npm/onnxruntime-web/dist/';
|
| 58 |
+
|
| 59 |
+
const session = await ort.InferenceSession.create('./model_int8.onnx', {
|
| 60 |
+
executionProviders: ['wasm'],
|
| 61 |
+
});
|
| 62 |
+
|
| 63 |
+
// Prepare input: (1, 3, 518, 518) RGB float32
|
| 64 |
+
const imageData = new Float32Array(1 * 3 * 518 * 518);
|
| 65 |
+
// ... fill with normalized RGB data ...
|
| 66 |
+
|
| 67 |
+
const inputTensor = new ort.Tensor('float32', imageData, [1, 3, 518, 518]);
|
| 68 |
+
const results = await session.run({ image: inputTensor });
|
| 69 |
+
|
| 70 |
+
const rays = results.rays; // (1, 3, 518, 518)
|
| 71 |
+
const tangentCoords = results.tangent_coords; // (1, 2, 518, 518)
|
| 72 |
+
```
|
| 73 |
+
|
| 74 |
+
## Usage — Transformers.js
|
| 75 |
+
|
| 76 |
+
```javascript
|
| 77 |
+
import { env } from '@huggingface/transformers';
|
| 78 |
+
|
| 79 |
+
// Point to this repo
|
| 80 |
+
env.allowLocalModels = false;
|
| 81 |
+
|
| 82 |
+
// Load ONNX model directly
|
| 83 |
+
const session = await ort.InferenceSession.create(
|
| 84 |
+
'https://huggingface.co/SebRincon/anycalib-onnx/resolve/main/model_int8.onnx'
|
| 85 |
+
);
|
| 86 |
+
```
|
| 87 |
+
|
| 88 |
+
## Input/Output Spec
|
| 89 |
+
|
| 90 |
+
- **Input**: `image` — `(B, 3, H, W)` RGB float32 in `[0, 1]`, H and W divisible by 14
|
| 91 |
+
- **Output**: `rays` — `(B, 3, H, W)` unit rays on S^2 manifold
|
| 92 |
+
- **Output**: `tangent_coords` — `(B, 2, H, W)` tangent space coordinates
|
| 93 |
+
|
| 94 |
+
## Note
|
| 95 |
+
|
| 96 |
+
The **Calibrator** (RANSAC + Gauss-Newton camera fitting) is NOT included in the ONNX model.
|
| 97 |
+
It must run as a lightweight CPU post-processing step. See the
|
| 98 |
+
[calibrator implementation](https://github.com/javrtg/AnyCalib) for details.
|
| 99 |
+
|
| 100 |
+
## Related
|
| 101 |
+
|
| 102 |
+
- [AnyCalib Raw](https://huggingface.co/SebRincon/anycalib) — Raw PyTorch weights (safetensors)
|
| 103 |
+
- [AnyCalib Source](https://github.com/javrtg/AnyCalib) — Original repository
|
config.json
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model_type": "anycalib_ray_head",
|
| 3 |
+
"source_model": "anycalib_gen",
|
| 4 |
+
"description": "AnyCalib ray prediction head (backbone + decoder + head). Calibrator (RANSAC + Gauss-Newton) must run in post-processing.",
|
| 5 |
+
"input": {
|
| 6 |
+
"name": "image",
|
| 7 |
+
"shape": [
|
| 8 |
+
"batch",
|
| 9 |
+
3,
|
| 10 |
+
518,
|
| 11 |
+
518
|
| 12 |
+
],
|
| 13 |
+
"dtype": "float32",
|
| 14 |
+
"range": [
|
| 15 |
+
0.0,
|
| 16 |
+
1.0
|
| 17 |
+
],
|
| 18 |
+
"color": "RGB"
|
| 19 |
+
},
|
| 20 |
+
"outputs": [
|
| 21 |
+
{
|
| 22 |
+
"name": "rays",
|
| 23 |
+
"shape": [
|
| 24 |
+
"batch",
|
| 25 |
+
3,
|
| 26 |
+
518,
|
| 27 |
+
518
|
| 28 |
+
],
|
| 29 |
+
"dtype": "float32"
|
| 30 |
+
},
|
| 31 |
+
{
|
| 32 |
+
"name": "tangent_coords",
|
| 33 |
+
"shape": [
|
| 34 |
+
"batch",
|
| 35 |
+
2,
|
| 36 |
+
518,
|
| 37 |
+
518
|
| 38 |
+
],
|
| 39 |
+
"dtype": "float32"
|
| 40 |
+
}
|
| 41 |
+
],
|
| 42 |
+
"architecture": {
|
| 43 |
+
"backbone": "DINOv2 ViT-L/14 (304M params)",
|
| 44 |
+
"decoder": "LightDPT (15.2M params)",
|
| 45 |
+
"head": "ConvexTangentDecoder (0.6M params)",
|
| 46 |
+
"total_params": "~320M"
|
| 47 |
+
},
|
| 48 |
+
"variants": {
|
| 49 |
+
"fp32": "1222.0 MB",
|
| 50 |
+
"fp16": "611.3 MB",
|
| 51 |
+
"int8": "311.1 MB"
|
| 52 |
+
},
|
| 53 |
+
"opset_version": 17,
|
| 54 |
+
"edge_divisible_by": 14,
|
| 55 |
+
"recommended_input_size": 518
|
| 56 |
+
}
|
model_fp16.onnx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:69aa3a57d91f1726d9daf5b0b2a93a1d28560ffdd68d02b12a3181e1a80f58dc
|
| 3 |
+
size 640943850
|
model_fp32.onnx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:21e64466e38cb0092bc64adee6fc06aebb9529fb22a069ca244c883cd0f751a7
|
| 3 |
+
size 1281329264
|
model_int8.onnx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:06be6e93ca5647c8140a67db87ce42bae335f2913187a9acfb9b83c1bdfe4b4b
|
| 3 |
+
size 326196551
|