Spaces:

ritianyu
/

InfiniDepth

Running on Zero

App Files Files Community

InfiniDepth / app.py

ritianyu

convert to ZeroGPU

24d5d34 6 days ago

raw

history blame contribute delete

33.5 kB

	from __future__ import annotations

	from functools import lru_cache
	import os
	from pathlib import Path
	from typing import Optional


	def _ensure_localhost_no_proxy() -> None:
	hosts = ["127.0.0.1", "localhost", "::1"]
	for key in ("NO_PROXY", "no_proxy"):
	current = os.environ.get(key, "")
	values = [value.strip() for value in current.split(",") if value.strip()]
	changed = False
	for host in hosts:
	if host not in values:
	values.append(host)
	changed = True
	if changed or not current:
	os.environ[key] = ",".join(values)


	_ensure_localhost_no_proxy()


	def _ensure_hf_cache_dirs() -> None:
	hf_home = os.environ.get("HF_HOME", "/tmp/huggingface")
	hub_cache = os.environ.get("HF_HUB_CACHE", os.path.join(hf_home, "hub"))
	assets_cache = os.environ.get("HF_ASSETS_CACHE", os.path.join(hf_home, "assets"))

	os.environ["HF_HOME"] = hf_home
	os.environ["HF_HUB_CACHE"] = hub_cache
	os.environ["HF_ASSETS_CACHE"] = assets_cache
	os.environ.setdefault("HUGGINGFACE_HUB_CACHE", hub_cache)
	os.environ.setdefault("HF_HUB_DISABLE_XET", "1")

	os.makedirs(hf_home, exist_ok=True)
	os.makedirs(hub_cache, exist_ok=True)
	os.makedirs(assets_cache, exist_ok=True)


	_ensure_hf_cache_dirs()

	import cv2
	import gradio as gr
	import numpy as np
	import torch
	from huggingface_hub import hf_hub_download

	try:
	import spaces
	except ImportError:
	class _SpacesFallback:
	@staticmethod
	def GPU(fn=None, **_kwargs):
	if callable(fn):
	return fn

	def decorator(inner_fn):
	return inner_fn

	return decorator

	spaces = _SpacesFallback()

	from InfiniDepth.gs import GSPixelAlignPredictor, export_ply
	from InfiniDepth.utils.gs_utils import (
	_build_sparse_uniform_gaussians,
	)
	from InfiniDepth.utils.hf_demo_utils import (
	DemoArtifacts,
	ensure_session_output_dir,
	export_point_cloud_assets,
	preview_depth_file,
	save_demo_artifacts,
	scan_example_cases,
	)
	from InfiniDepth.utils.hf_gs_viewer import (
	APP_TEMP_ROOT as GS_VIEWER_ROOT,
	build_embedded_viewer_html,
	build_viewer_error_html,
	)
	from InfiniDepth.utils.inference_utils import (
	apply_sky_mask_to_depth,
	build_camera_matrices,
	build_scaled_intrinsics_matrix,
	filter_gaussians_by_statistical_outlier,
	prepare_metric_depth_inputs,
	resolve_camera_intrinsics_for_inference,
	resolve_output_size_from_mode,
	run_optional_sampling_sky_mask,
	run_optional_sky_mask,
	unpack_gaussians_for_export,
	)
	from InfiniDepth.utils.io_utils import depth_to_disparity
	from InfiniDepth.utils.model_utils import build_model
	from InfiniDepth.utils.sampling_utils import SAMPLING_METHODS


	APP_ROOT = Path(__file__).resolve().parent
	EXAMPLES_DIR = APP_ROOT / "example_data"
	INPUT_SIZE = (768, 1024)
	APP_NAME = "infinidepth-hf-demo"
	GS_TASK_CHOICE = "3DGS"
	TASK_CHOICES = ["Depth", GS_TASK_CHOICE]
	RGB_MODEL_TYPE = "InfiniDepth"
	DEPTH_SENSOR_MODEL_TYPE = "InfiniDepth_DepthSensor"
	MODEL_CHOICES = [RGB_MODEL_TYPE, DEPTH_SENSOR_MODEL_TYPE]
	OUTPUT_MODE_CHOICES = ["upsample", "original", "specific"]
	GS_SAMPLE_POINT_NUM = 2000000
	GS_COORD_DETERMINISTIC_SAMPLING = True
	DEPTH_GPU_DURATION_SECONDS = 180
	GS_GPU_DURATION_SECONDS = 240

	LOCAL_DEPTH_MODEL_PATHS = {
	"InfiniDepth": APP_ROOT / "checkpoints/depth/infinidepth.ckpt",
	"InfiniDepth_DepthSensor": APP_ROOT / "checkpoints/depth/infinidepth_depthsensor.ckpt",
	}
	LOCAL_GS_MODEL_PATHS = {
	"InfiniDepth": APP_ROOT / "checkpoints/gs/infinidepth_gs.ckpt",
	"InfiniDepth_DepthSensor": APP_ROOT / "checkpoints/gs/infinidepth_depthsensor_gs.ckpt",
	}
	HF_REPO_ID = "ritianyu/InfiniDepth"
	HF_DEPTH_FILENAMES = {
	"InfiniDepth": "infinidepth.ckpt",
	"InfiniDepth_DepthSensor": "infinidepth_depthsensor.ckpt",
	}
	HF_GS_FILENAMES = {
	"InfiniDepth": "infinidepth_gs.ckpt",
	"InfiniDepth_DepthSensor": "infinidepth_depthsensor_gs.ckpt",
	}
	LOCAL_MOGE2_PATH = APP_ROOT / "checkpoints/moge-2-vitl-normal/model.pt"
	HF_MOGE2_FILENAME = "moge2.pt"
	LOCAL_SKYSEG_PATH = APP_ROOT / "checkpoints/sky/skyseg.onnx"
	HF_SKYSEG_FILENAME = "skyseg.onnx"

	EXAMPLE_CASES = scan_example_cases(EXAMPLES_DIR)
	EXAMPLE_LOOKUP = {case.name: case for case in EXAMPLE_CASES}
	DEFAULT_EXAMPLE_NAME = EXAMPLE_CASES[0].name if EXAMPLE_CASES else None
	DEFAULT_EXAMPLE_INDEX = 0 if EXAMPLE_CASES else None
	EXAMPLE_GALLERY_ITEMS = [(case.image_path, case.gallery_caption) for case in EXAMPLE_CASES]
	DEPTH_VIEW_TAB_ID = "pcd-viewer-tab"
	GS_VIEW_TAB_ID = "gs-viewer-tab"
	gr.set_static_paths(paths=[str(GS_VIEWER_ROOT)])

	CSS = """
	#top-workspace {
	align-items: stretch;
	}

	#controls-column,
	#inputs-column,
	#outputs-column {
	min-width: 0;
	}

	#example-gallery {
	min-height: 280px;
	}

	#input-image {
	min-height: 420px;
	}

	#input-depth-preview {
	min-height: 240px;
	}

	#depth-model3d-viewer {
	height: 700px;
	}

	#depth-model3d-viewer canvas,
	#depth-model3d-viewer model-viewer,
	#depth-model3d-viewer .wrap,
	#depth-model3d-viewer .container {
	height: 100% !important;
	max-height: 100% !important;
	}

	#gs-viewer-html {
	min-height: 748px;
	padding-bottom: 0.75rem;
	}

	#gs-viewer-html iframe {
	display: block;
	width: 100%;
	height: 700px !important;
	min-height: 700px !important;
	}

	#depth-preview,
	#depth-comparison,
	#depth-color {
	min-height: 260px;
	}
	"""


	def _ensure_cuda() -> None:
	if not torch.cuda.is_available():
	raise gr.Error(
	"No CUDA device is available for this request. On Hugging Face ZeroGPU, "
	"GPU access is only attached while the decorated inference call is running."
	)


	def _resolve_repo_asset(local_path: Path, filename: str) -> str:
	if local_path.exists():
	return str(local_path)

	return hf_hub_download(
	repo_id=HF_REPO_ID,
	filename=filename,
	)


	@lru_cache(maxsize=2)
	def _resolve_depth_checkpoint(model_type: str) -> str:
	return _resolve_repo_asset(LOCAL_DEPTH_MODEL_PATHS[model_type], HF_DEPTH_FILENAMES[model_type])


	@lru_cache(maxsize=2)
	def _resolve_gs_checkpoint(model_type: str) -> str:
	return _resolve_repo_asset(LOCAL_GS_MODEL_PATHS[model_type], HF_GS_FILENAMES[model_type])


	@lru_cache(maxsize=1)
	def _resolve_skyseg_path() -> str:
	return _resolve_repo_asset(LOCAL_SKYSEG_PATH, HF_SKYSEG_FILENAME)


	@lru_cache(maxsize=1)
	def _resolve_moge2_source() -> str:
	return _resolve_repo_asset(LOCAL_MOGE2_PATH, HF_MOGE2_FILENAME)


	@lru_cache(maxsize=1)
	def _preload_repo_assets() -> tuple[str, ...]:
	depth_paths = tuple(_resolve_depth_checkpoint(model_type) for model_type in MODEL_CHOICES)
	gs_paths = tuple(_resolve_gs_checkpoint(model_type) for model_type in MODEL_CHOICES)
	return depth_paths + gs_paths + (_resolve_moge2_source(), _resolve_skyseg_path())


	@lru_cache(maxsize=2)
	def _load_model(model_type: str):
	_ensure_cuda()
	model_path = _resolve_depth_checkpoint(model_type)
	return build_model(model_type=model_type, model_path=model_path)


	@lru_cache(maxsize=4)
	def _load_gs_predictor(model_type: str, dino_feature_dim: int):
	_ensure_cuda()
	predictor = GSPixelAlignPredictor(dino_feature_dim=dino_feature_dim).to(torch.device("cuda"))
	predictor.load_from_infinidepth_gs_checkpoint(_resolve_gs_checkpoint(model_type))
	predictor.eval()
	return predictor


	def _to_optional_float(value: Optional[float]) -> Optional[float]:
	if value in (None, ""):
	return None
	return float(value)


	def _to_rgb_uint8(image: np.ndarray) -> np.ndarray:
	image = np.asarray(image)
	if image.ndim != 3 or image.shape[2] != 3:
	raise gr.Error("Input image must be an RGB image.")

	if image.dtype == np.uint8:
	return image

	if np.issubdtype(image.dtype, np.floating):
	image = np.clip(image, 0.0, 1.0 if image.max() <= 1.0 else 255.0)
	if image.max() <= 1.0:
	image = image * 255.0
	return image.astype(np.uint8)

	return np.clip(image, 0, 255).astype(np.uint8)


	def _prepare_image_tensors(image_rgb: np.ndarray) -> tuple[np.ndarray, torch.Tensor, tuple[int, int]]:
	image_rgb = _to_rgb_uint8(image_rgb)
	org_h, org_w = image_rgb.shape[:2]
	resized = cv2.resize(image_rgb, INPUT_SIZE[::-1], interpolation=cv2.INTER_AREA)
	image = torch.from_numpy(resized).permute(2, 0, 1).unsqueeze(0).float() / 255.0
	return image_rgb, image, (org_h, org_w)


	def _format_depth_status(
	model_type: str,
	metric_depth_source: str,
	intrinsics_source: str,
	output_hw: tuple[int, int],
	depth_file: Optional[str],
	) -> str:
	depth_label = Path(depth_file).name if depth_file else "None"
	return (
	f"Task: `Depth`\n\n"
	f"Model: `{model_type}`\n\n"
	f"Input depth: `{depth_label}`\n\n"
	f"Metric alignment source: `{metric_depth_source}`\n\n"
	f"Camera intrinsics source: `{intrinsics_source}`\n\n"
	f"Output size: `{output_hw[0]} x {output_hw[1]}`"
	)


	def _format_gs_status(
	model_type: str,
	metric_depth_source: str,
	intrinsics_source: str,
	depth_file: Optional[str],
	gaussian_count: int,
	) -> str:
	depth_label = Path(depth_file).name if depth_file else "None"
	return (
	f"Task: `GS`\n\n"
	f"Model: `{model_type}`\n\n"
	f"Input depth: `{depth_label}`\n\n"
	f"Metric alignment source: `{metric_depth_source}`\n\n"
	f"Camera intrinsics source: `{intrinsics_source}`\n\n"
	f"Exported gaussians: `{gaussian_count}`"
	)


	def _model_availability_note(depth_path: Optional[str], model_type: str, *, auto_switched: bool = False) -> str:
	if depth_path:
	if auto_switched and model_type == DEPTH_SENSOR_MODEL_TYPE:
	return (
	"Depth file loaded. Switched model to `InfiniDepth_DepthSensor`. "
	"You can still switch back to `InfiniDepth` for RGB-only inference."
	)
	return (
	"Depth file loaded. `InfiniDepth_DepthSensor` is available. "
	"You can also keep `InfiniDepth` for RGB-only inference."
	)

	if auto_switched:
	return (
	"No input depth loaded. Switched model back to `InfiniDepth`. "
	"Upload a depth file to enable `InfiniDepth_DepthSensor`."
	)

	return "No input depth loaded. `InfiniDepth` will be used until you upload a depth file."


	def _compose_depth_info_message(base_message: str, note: str) -> str:
	return f"{base_message}\n\n{note}" if note else base_message


	def _load_example_image(example_name: str) -> tuple[np.ndarray, Optional[str], Optional[np.ndarray], str, str]:
	if not example_name:
	raise gr.Error("Select an example case first.")
	case = EXAMPLE_LOOKUP[example_name]

	image_bgr = cv2.imread(case.image_path, cv2.IMREAD_COLOR)
	if image_bgr is None:
	raise gr.Error(f"Failed to load example image: {case.image_path}")

	image_rgb = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB)
	depth_path = case.depth_path
	preview = None
	detail = _compose_depth_info_message(
	f"Loaded example `{case.name}`.",
	_model_availability_note(None, RGB_MODEL_TYPE),
	)
	model_type = RGB_MODEL_TYPE

	if depth_path is not None:
	preview, depth_msg = preview_depth_file(depth_path)
	model_type = DEPTH_SENSOR_MODEL_TYPE
	detail = _compose_depth_info_message(
	f"Loaded example `{case.name}` with paired depth. {depth_msg}",
	_model_availability_note(depth_path, model_type, auto_switched=True),
	)

	return image_rgb, depth_path, preview, model_type, detail


	def _selected_example_message(example_name: Optional[str]) -> str:
	if not example_name or example_name not in EXAMPLE_LOOKUP:
	return "Select an example thumbnail, then click `Load Example`."

	case = EXAMPLE_LOOKUP[example_name]
	mode_label = "RGB + depth" if case.has_depth else "RGB only"
	return f"Selected example: `{case.name}` ({mode_label})"


	def _select_example(evt: gr.SelectData):
	if not EXAMPLE_CASES or evt.index is None:
	return None, _selected_example_message(None)

	index = evt.index
	if isinstance(index, (tuple, list)):
	index = index[0]
	case = EXAMPLE_CASES[int(index)]
	return case.name, _selected_example_message(case.name)


	def _primary_view_for_task(task_type: str):
	selected_tab = GS_VIEW_TAB_ID if task_type == GS_TASK_CHOICE else DEPTH_VIEW_TAB_ID
	return gr.update(selected=selected_tab)


	def _reset_uploaded_image_state(
	_image: Optional[np.ndarray],
	depth_path: Optional[str],
	) -> tuple[None, None, str, str]:
	note = (
	"Image updated. Cleared the previous depth file. Upload a new depth file to enable "
	"`InfiniDepth_DepthSensor`."
	if depth_path
	else "Image updated. Upload a depth file to enable `InfiniDepth_DepthSensor`."
	)
	return None, None, RGB_MODEL_TYPE, note


	def _update_depth_preview(depth_path: Optional[str], model_type: str) -> tuple[Optional[np.ndarray], str, str]:
	try:
	preview, depth_msg = preview_depth_file(depth_path)
	except Exception as exc:
	raise gr.Error(f"Failed to preview depth file: {exc}") from exc

	if depth_path:
	next_model = DEPTH_SENSOR_MODEL_TYPE
	note = _model_availability_note(depth_path, next_model, auto_switched=(model_type != next_model))
	else:
	next_model = RGB_MODEL_TYPE
	note = _model_availability_note(depth_path, next_model, auto_switched=(model_type != next_model))

	return preview, next_model, _compose_depth_info_message(depth_msg, note)


	def _settings_visibility(task_type: str, output_resolution_mode: str):
	is_depth = task_type == "Depth"
	return (
	gr.update(visible=is_depth),
	gr.update(visible=is_depth and output_resolution_mode == "upsample"),
	gr.update(visible=is_depth and output_resolution_mode == "specific"),
	gr.update(visible=is_depth and output_resolution_mode == "specific"),
	gr.update(visible=is_depth),
	)


	def _normalize_filtered_gaussians(filtered_result):
	if isinstance(filtered_result, tuple):
	return filtered_result[0]
	return filtered_result


	@spaces.GPU(duration=DEPTH_GPU_DURATION_SECONDS)
	@torch.no_grad()
	def _run_depth_inference(
	image: np.ndarray,
	depth_file: Optional[str],
	model_type: str,
	output_resolution_mode: str,
	upsample_ratio: int,
	specific_height: int,
	specific_width: int,
	enable_skyseg_model: bool,
	filter_point_cloud: bool,
	fx_org: Optional[float],
	fy_org: Optional[float],
	cx_org: Optional[float],
	cy_org: Optional[float],
	request: gr.Request,
	):
	_ensure_cuda()
	if image is None:
	raise gr.Error("Upload an image or load an example before running inference.")
	if model_type == DEPTH_SENSOR_MODEL_TYPE and not depth_file:
	raise gr.Error("InfiniDepth_DepthSensor requires an input depth file.")

	skyseg_path = _resolve_skyseg_path() if enable_skyseg_model else None

	image_rgb, image_tensor, (org_h, org_w) = _prepare_image_tensors(image)
	device = torch.device("cuda")
	image_tensor = image_tensor.to(device)
	model = _load_model(model_type)

	gt_depth, prompt_depth, gt_depth_mask, use_gt_depth, moge2_intrinsics = prepare_metric_depth_inputs(
	input_depth_path=depth_file,
	input_size=INPUT_SIZE,
	image=image_tensor,
	device=device,
	moge2_pretrained=_resolve_moge2_source(),
	)

	gt_disp = depth_to_disparity(gt_depth)
	prompt_disp = depth_to_disparity(prompt_depth)

	fx_org, fy_org, cx_org, cy_org, intrinsics_source = resolve_camera_intrinsics_for_inference(
	fx_org=_to_optional_float(fx_org),
	fy_org=_to_optional_float(fy_org),
	cx_org=_to_optional_float(cx_org),
	cy_org=_to_optional_float(cy_org),
	org_h=org_h,
	org_w=org_w,
	image=image_tensor,
	moge2_pretrained=_resolve_moge2_source(),
	moge2_intrinsics=moge2_intrinsics,
	)

	_, _, h, w = image_tensor.shape
	fx, fy, cx, cy, _ = build_scaled_intrinsics_matrix(
	fx_org=fx_org,
	fy_org=fy_org,
	cx_org=cx_org,
	cy_org=cy_org,
	org_h=org_h,
	org_w=org_w,
	h=h,
	w=w,
	device=image_tensor.device,
	)

	sky_mask = run_optional_sky_mask(
	image=image_tensor,
	enable_skyseg_model=enable_skyseg_model,
	sky_model_ckpt_path=skyseg_path or str(LOCAL_SKYSEG_PATH),
	)

	h_out, w_out = resolve_output_size_from_mode(
	output_resolution_mode=output_resolution_mode,
	org_h=org_h,
	org_w=org_w,
	h=h,
	w=w,
	output_size=(int(specific_height), int(specific_width)),
	upsample_ratio=int(upsample_ratio),
	)

	query_2d_uniform_coord = SAMPLING_METHODS["2d_uniform"]((h_out, w_out)).unsqueeze(0).to(device)
	pred_2d_uniform_depth, _ = model.inference(
	image=image_tensor,
	query_coord=query_2d_uniform_coord,
	gt_depth=gt_disp,
	gt_depth_mask=gt_depth_mask,
	prompt_depth=prompt_disp,
	prompt_mask=prompt_disp > 0,
	)
	pred_depthmap = pred_2d_uniform_depth.permute(0, 2, 1).reshape(1, 1, h_out, w_out)

	pred_depthmap, pred_2d_uniform_depth = apply_sky_mask_to_depth(
	pred_depthmap=pred_depthmap,
	pred_2d_uniform_depth=pred_2d_uniform_depth,
	sky_mask=sky_mask,
	h_sample=h_out,
	w_sample=w_out,
	sky_depth_value=200.0,
	)

	session_hash = getattr(request, "session_hash", None)
	output_dir = ensure_session_output_dir(APP_NAME, session_hash)

	pred_depth_np = pred_depthmap.squeeze(0).squeeze(0).detach().cpu().numpy().astype(np.float32)
	artifacts = save_demo_artifacts(image_rgb=image_rgb, pred_depth=pred_depth_np, output_dir=output_dir)
	ply_path, glb_path = export_point_cloud_assets(
	sampled_coord=query_2d_uniform_coord.squeeze(0).cpu(),
	sampled_depth=pred_2d_uniform_depth.squeeze(0).squeeze(-1).cpu(),
	rgb_image=image_tensor.squeeze(0).cpu(),
	fx=fx,
	fy=fy,
	cx=cx,
	cy=cy,
	output_dir=output_dir,
	filter_flying_points=filter_point_cloud,
	)
	artifacts = DemoArtifacts(
	comparison_path=artifacts.comparison_path,
	color_depth_path=artifacts.color_depth_path,
	gray_depth_path=artifacts.gray_depth_path,
	raw_depth_path=artifacts.raw_depth_path,
	ply_path=ply_path,
	glb_path=glb_path,
	)

	metric_depth_source = "user depth" if use_gt_depth and depth_file else "MoGe-2"
	status = _format_depth_status(
	model_type=model_type,
	metric_depth_source=metric_depth_source,
	intrinsics_source=intrinsics_source,
	output_hw=(h_out, w_out),
	depth_file=depth_file,
	)
	return (
	status,
	artifacts.comparison_path,
	artifacts.color_depth_path,
	artifacts.gray_depth_path,
	glb_path,
	artifacts.download_files(),
	None,
	None,
	)


	@spaces.GPU(duration=GS_GPU_DURATION_SECONDS)
	@torch.no_grad()
	def _run_gs_inference(
	image: np.ndarray,
	depth_file: Optional[str],
	model_type: str,
	enable_skyseg_model: bool,
	fx_org: Optional[float],
	fy_org: Optional[float],
	cx_org: Optional[float],
	cy_org: Optional[float],
	request: gr.Request,
	):
	_ensure_cuda()
	if image is None:
	raise gr.Error("Upload an image or load an example before running inference.")
	if model_type == DEPTH_SENSOR_MODEL_TYPE and not depth_file:
	raise gr.Error("InfiniDepth_DepthSensor requires an input depth file for GS inference.")

	image_rgb, image_tensor, (org_h, org_w) = _prepare_image_tensors(image)
	del image_rgb
	device = torch.device("cuda")
	image_tensor = image_tensor.to(device)
	model = _load_model(model_type)

	gt_depth, prompt_depth, gt_depth_mask, use_gt_depth, moge2_intrinsics = prepare_metric_depth_inputs(
	input_depth_path=depth_file,
	input_size=INPUT_SIZE,
	image=image_tensor,
	device=device,
	moge2_pretrained=_resolve_moge2_source(),
	)

	gt_disp = depth_to_disparity(gt_depth)
	prompt_disp = depth_to_disparity(prompt_depth)

	fx_org, fy_org, cx_org, cy_org, intrinsics_source = resolve_camera_intrinsics_for_inference(
	fx_org=_to_optional_float(fx_org),
	fy_org=_to_optional_float(fy_org),
	cx_org=_to_optional_float(cx_org),
	cy_org=_to_optional_float(cy_org),
	org_h=org_h,
	org_w=org_w,
	image=image_tensor,
	moge2_pretrained=_resolve_moge2_source(),
	moge2_intrinsics=moge2_intrinsics,
	)

	b, _, h, w = image_tensor.shape
	_, _, _, _, intrinsics, extrinsics = build_camera_matrices(
	fx_org=fx_org,
	fy_org=fy_org,
	cx_org=cx_org,
	cy_org=cy_org,
	org_h=org_h,
	org_w=org_w,
	h=h,
	w=w,
	batch=b,
	device=device,
	)

	skyseg_path = _resolve_skyseg_path() if enable_skyseg_model else str(LOCAL_SKYSEG_PATH)
	sky_mask = run_optional_sampling_sky_mask(
	image=image_tensor,
	enable_skyseg_model=enable_skyseg_model,
	sky_model_ckpt_path=skyseg_path,
	dilate_px=0,
	)

	depthmap, dino_tokens, query_3d_uniform_coord, pred_depth_3d = model.inference_for_gs(
	image=image_tensor,
	intrinsics=intrinsics,
	gt_depth=gt_disp,
	gt_depth_mask=gt_depth_mask,
	prompt_depth=prompt_disp,
	prompt_mask=prompt_disp > 0,
	sky_mask=sky_mask,
	sample_point_num=GS_SAMPLE_POINT_NUM,
	coord_deterministic_sampling=GS_COORD_DETERMINISTIC_SAMPLING,
	)
	if query_3d_uniform_coord is None or pred_depth_3d is None:
	raise gr.Error("GS inference did not return 3D-uniform query outputs.")

	gs_predictor = _load_gs_predictor(model_type, int(dino_tokens.shape[-1]))
	dense_gaussians = gs_predictor(
	image=image_tensor,
	depthmap=depthmap,
	dino_tokens=dino_tokens,
	intrinsics=intrinsics,
	extrinsics=extrinsics,
	)

	pixel_gaussians = _build_sparse_uniform_gaussians(
	dense_gaussians=dense_gaussians,
	query_3d_uniform_coord=query_3d_uniform_coord,
	pred_depth_3d=pred_depth_3d,
	intrinsics=intrinsics,
	extrinsics=extrinsics,
	h=h,
	w=w,
	)
	pixel_gaussians = _normalize_filtered_gaussians(filter_gaussians_by_statistical_outlier(pixel_gaussians))
	gaussian_count = int(pixel_gaussians.means.shape[1])
	if gaussian_count == 0:
	raise gr.Error("No valid gaussians remained after filtering.")

	means, harmonics, opacities, scales, rotations = unpack_gaussians_for_export(pixel_gaussians)

	session_hash = getattr(request, "session_hash", None)
	output_dir = ensure_session_output_dir(APP_NAME, session_hash)
	ply_path = output_dir / "gaussians.ply"

	export_ply(
	means=means,
	harmonics=harmonics,
	opacities=opacities,
	path=ply_path,
	scales=scales,
	rotations=rotations,
	focal_length_px=(fx_org, fy_org),
	principal_point_px=(cx_org, cy_org),
	image_shape=(org_h, org_w),
	extrinsic_matrix=extrinsics[0],
	)

	try:
	gs_viewer_html = build_embedded_viewer_html(ply_path)
	except Exception as exc:
	print(f"[Warning] Failed to build embedded GS viewer: {exc}")
	gs_viewer_html = build_viewer_error_html(str(exc), ply_path)

	metric_depth_source = "user depth" if use_gt_depth and depth_file else "MoGe-2"
	status = _format_gs_status(
	model_type=model_type,
	metric_depth_source=metric_depth_source,
	intrinsics_source=intrinsics_source,
	depth_file=depth_file,
	gaussian_count=gaussian_count,
	)
	download_files = [str(ply_path)]

	return (
	status,
	None,
	None,
	None,
	None,
	None,
	gs_viewer_html,
	download_files,
	)


	def _run_inference(
	task_type: str,
	image: np.ndarray,
	depth_file: Optional[str],
	model_type: str,
	output_resolution_mode: str,
	upsample_ratio: int,
	specific_height: int,
	specific_width: int,
	enable_skyseg_model: bool,
	filter_point_cloud: bool,
	fx_org: Optional[float],
	fy_org: Optional[float],
	cx_org: Optional[float],
	cy_org: Optional[float],
	request: gr.Request,
	):
	if task_type == GS_TASK_CHOICE:
	return _run_gs_inference(
	image=image,
	depth_file=depth_file,
	model_type=model_type,
	enable_skyseg_model=enable_skyseg_model,
	fx_org=fx_org,
	fy_org=fy_org,
	cx_org=cx_org,
	cy_org=cy_org,
	request=request,
	)

	return _run_depth_inference(
	image=image,
	depth_file=depth_file,
	model_type=model_type,
	output_resolution_mode=output_resolution_mode,
	upsample_ratio=upsample_ratio,
	specific_height=specific_height,
	specific_width=specific_width,
	enable_skyseg_model=enable_skyseg_model,
	filter_point_cloud=filter_point_cloud,
	fx_org=fx_org,
	fy_org=fy_org,
	cx_org=cx_org,
	cy_org=cy_org,
	request=request,
	)


	def _clear_outputs():
	return "", None, None, None, None, None, "", None


	with gr.Blocks(css=CSS, theme=gr.themes.Soft()) as demo:
	gr.Markdown("# InfiniDepth Demo")
	gr.Markdown(
	"Switch between depth inference and GS inference. `InfiniDepth` works with RGB-only inputs, while `InfiniDepth_DepthSensor` is enabled only after you upload a depth file or load an example with paired depth."
	)

	selected_example_name = gr.State(DEFAULT_EXAMPLE_NAME)

	with gr.Row(elem_id="top-workspace"):
	with gr.Column(scale=4, min_width=320, elem_id="controls-column"):
	task_type = gr.Radio(label="Inference Task", choices=TASK_CHOICES, value="Depth")
	model_type = gr.Radio(label="Model Type", choices=MODEL_CHOICES, value=RGB_MODEL_TYPE)

	gr.Markdown("### Example Data")
	example_gallery = gr.Gallery(
	value=EXAMPLE_GALLERY_ITEMS,
	label="Example Data",
	show_label=False,
	columns=2,
	height=280,
	object_fit="cover",
	allow_preview=False,
	selected_index=DEFAULT_EXAMPLE_INDEX,
	elem_id="example-gallery",
	)
	example_selection = gr.Markdown(_selected_example_message(DEFAULT_EXAMPLE_NAME))
	load_example_btn = gr.Button("Load Example")

	with gr.Accordion("Depth Settings", open=True):
	output_resolution_mode = gr.Dropdown(
	label="Output Resolution Mode",
	choices=OUTPUT_MODE_CHOICES,
	value="upsample",
	)
	upsample_ratio = gr.Slider(label="Upsample Ratio", minimum=1, maximum=4, step=1, value=1)
	specific_height = gr.Number(label="Specific Height", value=INPUT_SIZE[0], precision=0, visible=False)
	specific_width = gr.Number(label="Specific Width", value=INPUT_SIZE[1], precision=0, visible=False)
	enable_skyseg_model = gr.Checkbox(label="Apply Sky Mask", value=False)
	filter_point_cloud = gr.Checkbox(label="Filter Flying Points", value=True)

	with gr.Accordion("Optional Camera Intrinsics", open=False):
	fx_org = gr.Textbox(label="fx", value="", placeholder="auto")
	fy_org = gr.Textbox(label="fy", value="", placeholder="auto")
	cx_org = gr.Textbox(label="cx", value="", placeholder="auto")
	cy_org = gr.Textbox(label="cy", value="", placeholder="auto")

	with gr.Column(scale=5, min_width=360, elem_id="inputs-column"):
	input_image = gr.Image(
	label="Input Image",
	image_mode="RGB",
	type="numpy",
	sources=["upload", "clipboard", "webcam"],
	height=420,
	elem_id="input-image",
	)
	input_depth_file = gr.File(
	label="Optional Depth File",
	type="filepath",
	file_types=[".png", ".npy", ".npz", ".h5", ".hdf5", ".exr"],
	)
	input_depth_preview = gr.Image(
	label="Input Depth Preview",
	type="numpy",
	height=240,
	elem_id="input-depth-preview",
	)
	depth_info = gr.Markdown("No input depth loaded. `InfiniDepth` will be used until you upload a depth file.")
	submit_btn = gr.Button("Run Inference", variant="primary")

	with gr.Column(scale=8, min_width=640, elem_id="outputs-column"):
	status_output = gr.Markdown()

	with gr.Tabs(selected=DEPTH_VIEW_TAB_ID, elem_id="primary-view-tabs") as primary_view_tabs:
	with gr.Tab("PCD Viewer", id=DEPTH_VIEW_TAB_ID, render_children=True):
	depth_model_3d = gr.Model3D(
	label="Point Cloud Viewer",
	display_mode="solid",
	clear_color=[1.0, 1.0, 1.0, 1.0],
	height=700,
	elem_id="depth-model3d-viewer",
	)
	with gr.Tab("GS Viewer", id=GS_VIEW_TAB_ID, render_children=True):
	gs_viewer_html = gr.HTML(elem_id="gs-viewer-html")

	with gr.Tabs(elem_id="secondary-output-tabs"):
	with gr.Tab("Depth Analysis", render_children=True):
	depth_comparison = gr.Image(
	label="RGB vs Depth",
	type="filepath",
	height=280,
	elem_id="depth-comparison",
	)
	with gr.Row():
	color_depth = gr.Image(
	label="Colorized Depth",
	type="filepath",
	height=260,
	elem_id="depth-color",
	)
	gray_depth = gr.Image(
	label="Grayscale Depth",
	type="filepath",
	height=260,
	elem_id="depth-preview",
	)
	with gr.Tab("Downloads", render_children=True):
	with gr.Row():
	depth_download_files = gr.File(label="Depth Files", type="filepath")
	gs_download_files = gr.File(label="GS Files", type="filepath")

	task_type.change(
	fn=_settings_visibility,
	inputs=[task_type, output_resolution_mode],
	outputs=[output_resolution_mode, upsample_ratio, specific_height, specific_width, filter_point_cloud],
	)
	task_type.change(
	fn=_primary_view_for_task,
	inputs=[task_type],
	outputs=[primary_view_tabs],
	)

	output_resolution_mode.change(
	fn=_settings_visibility,
	inputs=[task_type, output_resolution_mode],
	outputs=[output_resolution_mode, upsample_ratio, specific_height, specific_width, filter_point_cloud],
	)

	example_gallery.select(
	fn=_select_example,
	outputs=[selected_example_name, example_selection],
	)

	input_image.input(
	fn=_reset_uploaded_image_state,
	inputs=[input_image, input_depth_file],
	outputs=[input_depth_file, input_depth_preview, model_type, depth_info],
	)

	input_depth_file.change(
	fn=_update_depth_preview,
	inputs=[input_depth_file, model_type],
	outputs=[input_depth_preview, model_type, depth_info],
	)

	load_example_btn.click(
	fn=_load_example_image,
	inputs=[selected_example_name],
	outputs=[input_image, input_depth_file, input_depth_preview, model_type, depth_info],
	)

	submit_btn.click(
	fn=_primary_view_for_task,
	inputs=[task_type],
	outputs=[primary_view_tabs],
	).then(
	fn=_clear_outputs,
	outputs=[
	status_output,
	depth_comparison,
	color_depth,
	gray_depth,
	depth_model_3d,
	depth_download_files,
	gs_viewer_html,
	gs_download_files,
	],
	).then(
	fn=_run_inference,
	inputs=[
	task_type,
	input_image,
	input_depth_file,
	model_type,
	output_resolution_mode,
	upsample_ratio,
	specific_height,
	specific_width,
	enable_skyseg_model,
	filter_point_cloud,
	fx_org,
	fy_org,
	cx_org,
	cy_org,
	],
	outputs=[
	status_output,
	depth_comparison,
	color_depth,
	gray_depth,
	depth_model_3d,
	depth_download_files,
	gs_viewer_html,
	gs_download_files,
	],
	)


	if __name__ == "__main__":
	_preload_repo_assets()
	demo.queue().launch()