metricanything_student_depthmap

Sleeping

App Files Files Community

metricanything_student_depthmap / app.py

yjh001

Update app.py

5f7fb5b verified 4 months ago

raw

history blame contribute delete

6.85 kB

	#!/usr/bin/env python3
	"""Gradio demo for MetricAnything DepthMap."""

	from __future__ import annotations

	import json
	from pathlib import Path
	from typing import Any, Tuple

	import gradio as gr
	import matplotlib
	import numpy as np
	import torch
	from PIL import Image
	from torchvision.transforms import v2

	from depth_model import MetricAnythingDepthMap

	try:
	import spaces

	SPACES_AVAILABLE = True
	except Exception:
	SPACES_AVAILABLE = False


	EXAMPLES_DIR = Path(__file__).parent / "examples"
	MODEL_ID = "yjh001/metricanything_student_depthmap"
	MODEL_FILENAME = "student_depthmap.pt"
	MAX_DEPTH = 200.0


	def list_examples() -> list[Path]:
	exts = {".png", ".jpg", ".jpeg"}
	if not EXAMPLES_DIR.exists():
	return []
	return sorted([p for p in EXAMPLES_DIR.iterdir() if p.suffix.lower() in exts])


	def read_intrinsics(json_path: Path) -> float \| None:
	if not json_path.exists():
	return None
	data = json.loads(json_path.read_text())
	cam_in = data.get("cam_in")
	if cam_in is None:
	return None
	if isinstance(cam_in, (list, tuple)) and len(cam_in) > 0:
	return float(cam_in[0])
	if isinstance(cam_in, dict):
	for key in ("fx", "f_x", "focal_length", "focal_length_px"):
	if key in cam_in:
	return float(cam_in[key])
	return None


	def make_transform() -> v2.Compose:
	return v2.Compose([
	v2.ToImage(),
	v2.ToDtype(torch.float32, scale=True),
	v2.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
	])


	def colorize_depth(depth: np.ndarray, max_depth: float = MAX_DEPTH, cmap: str = "turbo_r") -> np.ndarray:
	"""Inverse-depth visualization in a 0–max_depth meter range; invalid/far pixels are white."""
	valid = np.isfinite(depth) & (depth > 0) & (depth <= max_depth)
	if not np.any(valid):
	return np.full((*depth.shape, 3), 255, dtype=np.uint8)

	disp = np.where(valid, 1.0 / depth, np.nan)
	min_disp, max_disp = np.nanquantile(disp, 0.001), np.nanquantile(disp, 0.99)
	disp = (disp - min_disp) / (max_disp - min_disp) if max_disp > min_disp else disp * 0.0

	colored = np.nan_to_num(matplotlib.colormaps[cmap](1.0 - disp)[..., :3], 0.0)
	colored = (colored.clip(0.0, 1.0) * 255).astype(np.uint8)
	colored[~valid] = 255
	return np.ascontiguousarray(colored)


	def prepare_focal(image: Image.Image, image_path: Path \| None) -> Tuple[float, str, gr.Slider]:
	width = image.width
	fx = None
	info = ""
	if image_path is not None:
	fx = read_intrinsics(image_path.with_suffix(".json"))
	if fx is not None:
	info = f"Intrinsics found. Using focal length (pixels): {fx:.2f}."
	else:
	info = f"No intrinsics found. Using image width (W={width}) as focal length (pixels)."
	else:
	info = f"No intrinsics found. Using image width (W={width}) as focal length (pixels)."

	if fx is None:
	fx = float(width)

	# slider = gr.Slider.update(value=fx, minimum=1, maximum=max(2000, int(width * 2)), step=1)
	slider = gr.update(value=fx, minimum=1, maximum=max(2000, int(width * 2)), step=1)
	return fx, info, slider


	def select_example(example_paths: list[str], evt: gr.SelectData):
	path = Path(example_paths[evt.index])
	image = Image.open(path).convert("RGB")
	_, info, slider = prepare_focal(image, path)
	return image, slider, info, "example"


	def on_input_change(image: Image.Image \| None, source: str):
	if image is None:
	# return gr.Slider.update(), gr.update(), ""
	return gr.update(), gr.update(), ""
	if source == "example":
	# return gr.Slider.update(), gr.update(), ""
	return gr.update(), gr.update(), ""
	_, info, slider = prepare_focal(image, None)
	return slider, info, ""


	def load_model() -> MetricAnythingDepthMap:
	device = "cuda" if torch.cuda.is_available() else "cpu"
	model = MetricAnythingDepthMap.from_pretrained(
	MODEL_ID,
	model_kwargs={"device": device},
	filename=MODEL_FILENAME,
	)
	model.eval()
	return model


	TRANSFORM = make_transform()
	MODEL = load_model()
	DEVICE = "cuda" if torch.cuda.is_available() else "cpu"


	@torch.no_grad()
	@(spaces.GPU if SPACES_AVAILABLE else (lambda f: f))
	def run_inference(image: Image.Image \| None, focal_px: float):
	if image is None:
	return None, "Please provide an input image."

	tensor = TRANSFORM(image).unsqueeze(0).to(DEVICE)
	prediction = MODEL.infer(tensor, f_px=float(focal_px))
	depth = prediction["depth"].detach().cpu().numpy().squeeze()

	vis = colorize_depth(depth, max_depth=MAX_DEPTH)
	valid = np.isfinite(depth) & (depth > 0) & (depth <= MAX_DEPTH)
	if np.any(valid):
	min_d = float(depth[valid].min())
	max_d = float(depth[valid].max())
	stats = f"Depth range (0–{MAX_DEPTH:.0f} m): min={min_d:.2f} m, max={max_d:.2f} m"
	else:
	stats = f"No valid depth in 0–{MAX_DEPTH:.0f} m range."

	return vis, stats


	def build_demo() -> gr.Blocks:
	example_paths = list_examples()
	gallery_items = [(str(p), p.name) for p in example_paths]

	with gr.Blocks(theme=gr.themes.Soft()) as demo:
	gr.Markdown("# MetricAnything DepthMap")
	gr.Markdown("Select an example or upload your own image to estimate metric depth.")

	with gr.Row():
	with gr.Column(scale=3):
	gallery = gr.Gallery(
	value=gallery_items,
	label="Examples",
	columns=4,
	rows=2,
	height=220,
	)
	input_image = gr.Image(type="pil", label="Input", height=320)
	focal_slider = gr.Slider(label="Focal length (pixels)", minimum=1, maximum=4000, step=1, value=1000)
	info = gr.Markdown("Select an example or upload an image.")
	run_btn = gr.Button("Run")

	with gr.Column(scale=4):
	output_image = gr.Image(type="numpy", label="Depth (visualized)", height=420)
	output_stats = gr.Markdown("")

	example_state = gr.State([str(p) for p in example_paths])
	source_state = gr.State("")

	if example_paths:
	gallery.select(
	select_example,
	inputs=[example_state],
	outputs=[input_image, focal_slider, info, source_state],
	)

	input_image.change(
	on_input_change,
	inputs=[input_image, source_state],
	outputs=[focal_slider, info, source_state],
	)

	run_btn.click(
	run_inference,
	inputs=[input_image, focal_slider],
	outputs=[output_image, output_stats],
	)

	return demo


	demo = build_demo()

	if __name__ == "__main__":
	demo.launch()