File size: 3,920 Bytes
e7682cd
 
b5813c5
 
e7682cd
 
 
 
 
 
 
b5813c5
 
 
 
 
e7682cd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
from __future__ import annotations

import os
import sys
from typing import Dict, Optional, Tuple

import gradio as gr
import numpy as np
import torch
from PIL import Image

# Add src directory to Python path to import depth_anything_3 module
src_path = os.path.join(os.path.dirname(__file__), 'src')
if src_path not in sys.path:
    sys.path.insert(0, src_path)

from depth_anything_3.api import DepthAnything3
from depth_anything_3.utils.visualize import visualize_depth

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
MODEL_SOURCES: Dict[str, str] = {
    "Depth Anything v3 Nested Giant Large": "depth-anything/DA3NESTED-GIANT-LARGE",
    "Depth Anything v3 Giant": "depth-anything/DA3-GIANT",
    "Depth Anything v3 Large": "depth-anything/DA3-LARGE",
    "Depth Anything v3 Base": "depth-anything/DA3-BASE",
    "Depth Anything v3 Small": "depth-anything/DA3-SMALL",
    "Depth Anything v3 Metric Large": "depth-anything/DA3METRIC-LARGE",
    "Depth Anything v3 Mono Large": "depth-anything/DA3MONO-LARGE",
}
_MODEL_CACHE: Dict[str, DepthAnything3] = {}


def _load_model(model_label: str) -> DepthAnything3:
    repo_id = MODEL_SOURCES[model_label]
    if repo_id not in _MODEL_CACHE:
        model = DepthAnything3.from_pretrained(repo_id)
        model = model.to(device=DEVICE)
        model.eval()
        _MODEL_CACHE[repo_id] = model
    return _MODEL_CACHE[repo_id]


def _prep_image(image: np.ndarray) -> np.ndarray:
    if image.ndim == 2:
        image = np.stack([image] * 3, axis=-1)
    if image.dtype != np.uint8:
        image = np.clip(image, 0, 255).astype(np.uint8)
    return image


def run_inference(
    model_label: str,
    image: Optional[np.ndarray],
) -> tuple[Tuple[np.ndarray, np.ndarray], str]:
    if image is None:
        raise gr.Error("Upload an image before running inference.")
    rgb = _prep_image(image)
    model = _load_model(model_label)
    prediction = model.inference(
        image=[Image.fromarray(rgb)],
        process_res=None,
        process_res_method="keep",
    )
    depth_map = prediction.depth[0]
    depth_vis = visualize_depth(depth_map, cmap="Spectral")
    processed_rgb = (
        prediction.processed_images[0]
        if prediction.processed_images is not None
        else rgb
    )
    slider_value: Tuple[np.ndarray, np.ndarray] = (processed_rgb, depth_vis)
    lines = [
        f"**Model:** `{MODEL_SOURCES[model_label]}`",
        f"**Device:** `{DEVICE}`",
        f"**Depth shape:** `{tuple(prediction.depth.shape)}`",
    ]
    if prediction.extrinsics is not None:
        lines.append(f"**Extrinsics shape:** `{prediction.extrinsics.shape}`")
    if prediction.intrinsics is not None:
        lines.append(f"**Intrinsics shape:** `{prediction.intrinsics.shape}`")
    return slider_value, "\n".join(lines)


def build_app() -> gr.Blocks:
    with gr.Blocks(title="Depth Anything v3 - Any Size Demo") as demo:
        gr.Markdown(
            """
            ## Depth Anything v3 (Any-Size Demo)
            Upload an image, pick a pretrained model, and compare RGB against the inferred depth.
            """
        )
        with gr.Row():
            model_dropdown = gr.Dropdown(
                choices=list(MODEL_SOURCES.keys()),
                value="Depth Anything v3 Large",
                label="Model",
            )
        image_input = gr.Image(type="numpy", label="Input Image", image_mode="RGB")
        run_button = gr.Button("Run Inference", variant="primary")
        with gr.Row():
            comparison_slider = gr.ImageSlider(label="RGB vs Depth")
        info_panel = gr.Markdown()
        run_button.click(
            fn=run_inference,
            inputs=[model_dropdown, image_input],
            outputs=[comparison_slider, info_panel],
        )
    return demo


def main() -> None:
    app = build_app()
    app.queue(max_size=8).launch()


if __name__ == "__main__":
    main()