Spaces:

Ravenok
/

depth-anything-v2

Running on Zero

Lord-Raven

Trying to add CPU support.

8adef27 3 months ago

5.92 kB

	import gradio as gr
	import cv2
	import matplotlib
	import numpy as np
	import os
	import PIL
	from PIL import Image
	import spaces
	import torch
	import torch.nn.functional as F
	from torchvision.transforms.functional import normalize
	import tempfile
	from gradio_imageslider import ImageSlider
	from huggingface_hub import hf_hub_download
	from briarmbg import BriaRMBG

	from depth_anything_v2.dpt import DepthAnythingV2


	net_cpu = BriaRMBG.from_pretrained("briaai/RMBG-1.4")
	net_cpu.to('cpu')
	net_cpu.eval()

	net_gpu = None
	if torch.cuda.is_available():
	net_gpu = BriaRMBG.from_pretrained("briaai/RMBG-1.4")
	net_gpu.to('cuda')
	net_gpu.eval()

	def resize_image(image):
	image = image.convert('RGB')
	model_input_size = (1024, 1024)
	image = image.resize(model_input_size, Image.BILINEAR)
	return image

	def _run_rmbg_on_image(image_np, net, device_str):
	"""Shared helper: run RMBG net on a numpy image and return a PIL RGBA with alpha mask."""
	orig_image = Image.fromarray(image_np)
	w, h = orig_image.size
	img = resize_image(orig_image)
	im_np = np.array(img)
	im_tensor = torch.tensor(im_np, dtype=torch.float32).permute(2, 0, 1).unsqueeze(0) / 255.0
	im_tensor = normalize(im_tensor, [0.5, 0.5, 0.5], [1.0, 1.0, 1.0])
	if device_str == 'cuda':
	im_tensor = im_tensor.cuda()
	with torch.no_grad():
	result = net(im_tensor)
	result = torch.squeeze(F.interpolate(result[0][0], size=(h, w), mode='bilinear'), 0)
	ma = torch.max(result); mi = torch.min(result)
	result = (result - mi) / (ma - mi + 1e-8)
	result_array = (result * 255).cpu().numpy().astype(np.uint8)
	pil_mask = Image.fromarray(np.squeeze(result_array))
	new_im = orig_image.copy()
	new_im.putalpha(pil_mask)
	return new_im

	@spaces.GPU(duration=6)
	def process_background_gpu(image):
	if net_gpu is None:
	raise RuntimeError("No GPU instance available")
	return _run_rmbg_on_image(image, net_gpu, 'cuda')

	def process_background_cpu(image):
	return _run_rmbg_on_image(image, net_cpu, 'cpu')

	# wrapper used by the UI: try GPU first, fall back to CPU on any exception
	def process_background(image):
	try:
	# attempt GPU call (this can raise if Zero-GPU is unavailable)
	return process_background_gpu(image)
	except Exception:
	# fallback to CPU path
	return process_background_cpu(image)


	css = """
	#img-display-container {
	max-height: 100vh;
	}
	#img-display-input {
	max-height: 80vh;
	}
	#img-display-output {
	max-height: 80vh;
	}
	#download {
	height: 62px;
	}
	"""
	DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
	model_configs = {
	'vits': {'encoder': 'vits', 'features': 64, 'out_channels': [48, 96, 192, 384]},
	'vitb': {'encoder': 'vitb', 'features': 128, 'out_channels': [96, 192, 384, 768]},
	'vitl': {'encoder': 'vitl', 'features': 256, 'out_channels': [256, 512, 1024, 1024]}
	}
	encoder2name = {
	'vits': 'Small',
	'vitb': 'Base',
	'vitl': 'Large'
	}
	encoder = 'vitb'
	model_name = encoder2name[encoder]
	model = DepthAnythingV2(**model_configs[encoder])
	filepath = hf_hub_download(repo_id=f"depth-anything/Depth-Anything-V2-{model_name}", filename=f"depth_anything_v2_{encoder}.pth", repo_type="model")
	state_dict = torch.load(filepath, map_location="cpu")
	model.load_state_dict(state_dict)
	model = model.to(DEVICE).eval()

	title = "# Chub Image Stuff"
	description = """This is an endpoint for some image operations for a Chub.ai stage. It was just a copy of [Depth Anything V2](https://depth-anything-v2.github.io),
	but now also includes [BRIA](https://huggingface.co/briaai/RMBG-1.4) for background removal."""

	@spaces.GPU(duration=6)
	def predict_depth(image):
	return model.infer_image(image)

	with gr.Blocks(css=css) as demo:
	gr.Markdown(title)
	gr.Markdown(description)
	gr.Markdown("### Image Processing Stuff")

	with gr.Row():
	input_image = gr.Image(label="Input Image", type='numpy', elem_id='img-display-input')
	depth_image_slider = ImageSlider(label="Slider View", elem_id='img-display-output', position=0.5)
	depth_submit = gr.Button(value="Compute Depth")
	remove_background_submit = gr.Button(value="Remove Background")
	gray_depth_file = gr.File(label="Grayscale depth map", elem_id="download",)
	raw_file = gr.File(label="16-bit raw output (can be considered as disparity)", elem_id="download",)

	cmap = matplotlib.colormaps.get_cmap('Spectral_r')

	def remove_background(image):
	original_image = image.copy()

	result_image = process_background(image)
	tmp_file = tempfile.NamedTemporaryFile(suffix='.png', delete=False)
	result_image.save(tmp_file.name)
	return [(original_image, result_image), tmp_file.name, tmp_file.name]

	def on_submit(image):
	original_image = image.copy()

	h, w = image.shape[:2]

	depth = predict_depth(image[:, :, ::-1])

	raw_depth = Image.fromarray(depth.astype('uint16'))
	tmp_raw_depth = tempfile.NamedTemporaryFile(suffix='.png', delete=False)
	raw_depth.save(tmp_raw_depth.name)

	depth = (depth - depth.min()) / (depth.max() - depth.min()) * 255.0
	depth = depth.astype(np.uint8)
	colored_depth = (cmap(depth)[:, :, :3] * 255).astype(np.uint8)

	gray_depth = Image.fromarray(depth)
	tmp_gray_depth = tempfile.NamedTemporaryFile(suffix='.png', delete=False)
	gray_depth.save(tmp_gray_depth.name)

	return [(original_image, colored_depth), tmp_gray_depth.name, tmp_raw_depth.name]

	depth_submit.click(on_submit, inputs=[input_image], outputs=[depth_image_slider, gray_depth_file, raw_file], api_name="predict_depth")
	remove_background_submit.click(remove_background, inputs=[input_image], outputs=[depth_image_slider, gray_depth_file, raw_file], api_name="remove_background")

	if __name__ == '__main__':
	demo.queue().launch(share=True)