Depth-Anything-V2

Running on Zero

App Files Files Community

Depth-Anything-V2 / app.py

ZenosArrows

Improve layout

f57b90f verified about 2 months ago

raw

history blame contribute delete

11.7 kB

	import glob
	import gradio as gr
	import numpy as np
	import spaces
	import torch
	import tempfile
	import uuid
	from huggingface_hub import hf_hub_download
	from PIL import Image, ImageOps, ImageEnhance
	from pathlib import Path
	from zipfile import ZipFile, is_zipfile
	from pypdf import PdfReader

	from depth_anything_v2.dpt import DepthAnythingV2

	css = """
	#img-display-container {
	max-height: 100vh;
	}
	#img-display-input {
	max-height: 80vh;
	}
	#img-display-output {
	max-height: 80vh;
	}
	#download {
	height: 62px;
	}
	.thumbnail-item {
	aspect-ratio: var(--ratio-wide)
	}
	.thumbnail-item img {
	object-fit: contain
	}
	"""
	head = """
	<script type="module">
	import { BridgeClient, RGBDHologram } from "/file=assets/looking-glass-bridge.js";
	window.BridgeClient = BridgeClient;
	window.RGBDHologram = RGBDHologram;
	window.updating = false;
	window.settings = {
	depthiness: 1.0,
	focus: 0,
	aspect: 1,
	chroma_depth: 0,
	depth_inversion: 0,
	depth_loc: 2,
	depth_cutoff: 1,
	zoom: 1,
	crop_pos_x: 0,
	crop_pos_y: 0,
	};
	window.castHologram = async function(gallery) {
	if (gallery.length == 0)
	return;
	const selected = document.querySelector('#img-display-output .thumbnail-item.selected img');
	const uri = selected ? selected.src : gallery[0].image;
	if (!uri)
	return;
	const Bridge = BridgeClient.getInstance();
	if (!Bridge.isConnected)
	await Bridge.connect();
	await Bridge.getDisplays();
	if (Bridge.isCastPending)
	return;
	const rgbd = new RGBDHologram({ uri, settings });
	await Bridge.cast(rgbd);
	};
	window.updateHologram = async function(value, parameter) {
	settings[parameter] = value;
	const Bridge = BridgeClient.getInstance();
	if (!Bridge.isConnected \|\| window.updating)
	return;
	const name = Bridge.getCurrentPlaylist().name;
	window.updating = true;
	await Bridge.updateCurrentHologram({ name, parameter, value });
	window.updating = false;
	};
	</script>
	"""
	DEVICE = 'cuda' if torch.cuda.is_available() else 'mps' if torch.backends.mps.is_available() else 'cpu'
	model_configs = {
	'vits': {'encoder': 'vits', 'features': 64, 'out_channels': [48, 96, 192, 384]},
	'vitb': {'encoder': 'vitb', 'features': 128, 'out_channels': [96, 192, 384, 768]},
	'vitl': {'encoder': 'vitl', 'features': 256, 'out_channels': [256, 512, 1024, 1024]},
	'vitg': {'encoder': 'vitg', 'features': 384, 'out_channels': [1536, 1536, 1536, 1536]}
	}
	encoder2name = {
	'vits': 'Small',
	'vitb': 'Base',
	'vitl': 'Large',
	'vitg': 'Giant', # we are undergoing company review procedures to release our giant model checkpoint
	}

	title = "# Depth Anything V2"
	description = """Looking Glass demo for Depth Anything V2.
	Please refer to our [paper](https://arxiv.org/abs/2406.09414), [project page](https://depth-anything-v2.github.io), or [github](https://github.com/DepthAnything/Depth-Anything-V2) for more details."""

	@spaces.GPU
	def predict_depth(image, model):
	w, h = image.size

	depth = model.infer_image(np.array(image.convert("RGB"))[:, :, ::-1])

	depth = (depth - depth.min()) / (depth.max() - depth.min()) * 255.0
	depth = depth.astype(np.uint8)

	gray_depth = Image.fromarray(depth)

	rgbd = Image.new(image.mode, (w * 2, h))
	rgbd.paste(image, (0, 0))
	rgbd.paste(gray_depth, (w, 0))
	return rgbd

	@spaces.GPU
	def upscale_image(image, model, background, discard_alpha):
	if image.mode == "RGBA":
	if discard_alpha:
	image = Image.alpha_composite(ImageOps.pad(background, image.size, color=(0, 0, 0)), image);
	elif image.mode != "RGB":
	image = image.convert("RGB")
	if model is not None:
	image = model.infer(image)
	return image.convert("RGB") if discard_alpha else image

	@spaces.GPU
	def on_submit(image, batch_images, book, encoder, upscale_model, upscale_method, denoise_level, discard_alpha, progress=gr.Progress()):
	model_name = encoder2name[encoder]
	model = DepthAnythingV2(**model_configs[encoder])
	filepath = hf_hub_download(repo_id=f"depth-anything/Depth-Anything-V2-{model_name}", filename=f"depth_anything_v2_{encoder}.pth", repo_type="model")
	state_dict = torch.load(filepath, map_location="cpu")
	model.load_state_dict(state_dict)
	model = model.to(DEVICE).eval()

	superresolution = None
	if upscale_method is not None:
	superresolution = torch.hub.load("nagadomi/nunif:master", "waifu2x",
	model_type=upscale_model, method=upscale_method, noise_level=denoise_level,
	keep_alpha=not discard_alpha, trust_repo=True).to(DEVICE)

	gradient = ImageEnhance.Brightness(Image.radial_gradient("L"))
	background = ImageOps.invert(gradient.enhance(1.5)).convert("RGBA")

	result = []
	if image is not None:
	image = upscale_image(image, superresolution, background, discard_alpha)
	result.append((predict_depth(image, model), None))
	if batch_images is not None:
	for path in progress.tqdm(batch_images):
	with Image.open(path) as img:
	img = upscale_image(img, superresolution, background, discard_alpha)
	result.append((predict_depth(img, model), Path(path).name))
	if book is not None:
	if is_zipfile(book):
	with ZipFile(book, "r") as zf:
	for entry in progress.tqdm(zf.infolist()):
	with zf.open(entry) as file:
	with Image.open(file) as img:
	img = upscale_image(img, superresolution, background, discard_alpha)
	result.append((predict_depth(img, model), entry.filename))
	else:
	reader = PdfReader(book)
	for page in progress.tqdm(reader.pages):
	for image_file_object in page.images:
	img = upscale_image(image_file_object.image, superresolution, background, discard_alpha)
	result.append((predict_depth(img, model), image_file_object.name))
	return result

	def zip_gallery(gallery, progress=gr.Progress()):
	if gallery is None:
	return None
	if len(gallery) == 1:
	return gallery[0][0]
	temp = Path(tempfile.gettempdir()) / uuid.uuid4().hex
	zip = temp.with_suffix(".zip")
	with ZipFile(zip, "w") as zf:
	for index, image in progress.tqdm(enumerate(gallery)):
	fn = Path(image[0]).name if image[1] is None else Path(image[1]).with_suffix(".rgbd.png")
	zf.write(image[0], "{:02d}_{}".format(index, fn))
	return zip

	gr.set_static_paths(paths=[Path.cwd().absolute()/"assets"])

	with gr.Blocks(css=css, head=head) as demo:
	gr.Markdown(title)
	gr.Markdown(description)

	with gr.Row():
	with gr.Column():
	with gr.Tab("Single Image"):
	input_image = gr.Image(
	label="Input Image",
	elem_id='img-display-input',
	type='pil',
	image_mode=None
	)
	with gr.Tab("Batch Mode"):
	batch_images = gr.File(
	label="Input Images",
	file_types=["image"],
	file_count="multiple"
	)
	with gr.Tab("Document Mode"):
	book = gr.File(
	label="PDF/ZIP Document",
	file_types=[".pdf", ".zip"],
	)
	with gr.Row():
	clear = gr.ClearButton(components=[input_image, batch_images, book])
	submit = gr.Button(value="Compute Depth", variant="primary")
	model_size = gr.Radio(
	label="Model Size",
	choices=[('Small', 'vits'), ('Base', 'vitb'), ('Large', 'vitl')],
	value="vitl"
	)
	upscale_method = gr.Radio(
	label="Upscale Method",
	choices=[("No Upscaling or Denoising", None), ("Denoise Only", "noise"), ("2x Upscaling", "scale2x"), ("4x Upscaling", "scale4x")]
	)
	upscale_model = gr.Dropdown(
	choices=["art", "art_scan", "photo", "swin_unet/art", "swin_unet/art_scan", "swin_unet/photo", "cunet/art", "upconv_7/art", "upconv_7/photo"],
	label="Upscaling Model",
	value="art"
	)
	denoise_level = gr.Slider(
	label="Denoise Level (-1 = None)",
	value=0,
	step=1,
	minimum=-1,
	maximum=4
	)
	discard_alpha = gr.Checkbox(label="Add radial gradient background to transparent images", value=True)

	with gr.Column():
	with gr.Tab("Result"):
	gallery = gr.Gallery(
	label="RGBD Images",
	elem_id='img-display-output',
	format="png",
	columns=4,
	object_fit="contain",
	preview=True,
	interactive=True
	)
	download_btn = gr.DownloadButton()
	depthiness = gr.Slider(
	label="Depthiness",
	elem_id="depthiness",
	interactive=True,
	minimum=0,
	maximum=3,
	value=1
	)
	focus = gr.Slider(
	label="Focus",
	interactive=True,
	minimum=-0.03,
	maximum=0.03,
	value=0
	)
	zoom = gr.Slider(
	label="Zoom",
	interactive=True,
	minimum=0,
	maximum=10,
	value=1
	)
	pos_x = gr.Slider(
	label="Position X",
	interactive=True,
	minimum=-1,
	maximum=1,
	value=0
	)
	pos_y = gr.Slider(
	label="Position Y",
	interactive=True,
	minimum=-1,
	maximum=1,
	value=0
	)
	reset = gr.Button(value="Reset All Parameters")

	gallery.select(fn=None, js="castHologram", inputs=gallery)
	gallery.change(fn=zip_gallery, inputs=gallery, outputs=download_btn).then(fn=None, js="castHologram", inputs=gallery)

	submit.click(
	on_submit,
	inputs=[input_image, batch_images, book, model_size, upscale_model, upscale_method, denoise_level, discard_alpha],
	outputs=[gallery]
	).success(fn=zip_gallery, inputs=gallery, outputs=download_btn).then(fn=None, js="castHologram", inputs=gallery)

	depthiness.change(fn=None, inputs=depthiness, js="(value) => updateHologram (value, 'depthiness')")
	focus.change(fn=None, inputs=focus, js="(value) => updateHologram (value, 'focus')")
	zoom.change(fn=None, inputs=zoom, js="(value) => updateHologram (value, 'zoom')")
	pos_x.change(fn=None, inputs=pos_x, js="(value) => updateHologram (value, 'crop_pos_x')")
	pos_y.change(fn=None, inputs=pos_y, js="(value) => updateHologram (value, 'crop_pos_y')")

	reset.click(fn=None, js="""
	() => {
	document.querySelectorAll('button.reset-button').forEach(b => b.click());
	}
	""")

	def on_submit_example(image):
	return on_submit(image, None, None, 'vitl', None, None, -1, True)
	example_files = glob.glob('assets/examples/*')
	examples = gr.Examples(examples=example_files, inputs=[input_image], outputs=[gallery], fn=on_submit_example)
	examples.load_input_event.success(fn=None, js="castHologram", inputs=gallery)

	if __name__ == '__main__':
	demo.queue().launch()