Spaces:

Ravenok
/

depth-anything-v2

Running on Zero

File size: 5,917 Bytes

import gradio as gr
import cv2
import matplotlib
import numpy as np
import os
import PIL
from PIL import Image
import spaces
import torch
import torch.nn.functional as F
from torchvision.transforms.functional import normalize
import tempfile
from gradio_imageslider import ImageSlider
from huggingface_hub import hf_hub_download
from briarmbg import BriaRMBG

from depth_anything_v2.dpt import DepthAnythingV2


net_cpu = BriaRMBG.from_pretrained("briaai/RMBG-1.4")
net_cpu.to('cpu')
net_cpu.eval()

net_gpu = None
if torch.cuda.is_available():
    net_gpu = BriaRMBG.from_pretrained("briaai/RMBG-1.4")
    net_gpu.to('cuda')
    net_gpu.eval()

def resize_image(image):
    image = image.convert('RGB')
    model_input_size = (1024, 1024)
    image = image.resize(model_input_size, Image.BILINEAR)
    return image

def _run_rmbg_on_image(image_np, net, device_str):
    """Shared helper: run RMBG net on a numpy image and return a PIL RGBA with alpha mask."""
    orig_image = Image.fromarray(image_np)
    w, h = orig_image.size
    img = resize_image(orig_image)
    im_np = np.array(img)
    im_tensor = torch.tensor(im_np, dtype=torch.float32).permute(2, 0, 1).unsqueeze(0) / 255.0
    im_tensor = normalize(im_tensor, [0.5, 0.5, 0.5], [1.0, 1.0, 1.0])
    if device_str == 'cuda':
        im_tensor = im_tensor.cuda()
    with torch.no_grad():
        result = net(im_tensor)
    result = torch.squeeze(F.interpolate(result[0][0], size=(h, w), mode='bilinear'), 0)
    ma = torch.max(result); mi = torch.min(result)
    result = (result - mi) / (ma - mi + 1e-8)
    result_array = (result * 255).cpu().numpy().astype(np.uint8)
    pil_mask = Image.fromarray(np.squeeze(result_array))
    new_im = orig_image.copy()
    new_im.putalpha(pil_mask)
    return new_im

@spaces.GPU(duration=6)
def process_background_gpu(image):
    if net_gpu is None:
        raise RuntimeError("No GPU instance available")
    return _run_rmbg_on_image(image, net_gpu, 'cuda')

def process_background_cpu(image):
    return _run_rmbg_on_image(image, net_cpu, 'cpu')

# wrapper used by the UI: try GPU first, fall back to CPU on any exception
def process_background(image):
    try:
        # attempt GPU call (this can raise if Zero-GPU is unavailable)
        return process_background_gpu(image)
    except Exception:
        # fallback to CPU path
        return process_background_cpu(image)


css = """
#img-display-container {
    max-height: 100vh;
}
#img-display-input {
    max-height: 80vh;
}
#img-display-output {
    max-height: 80vh;
}
#download {
    height: 62px;
}
"""
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
model_configs = {
    'vits': {'encoder': 'vits', 'features': 64, 'out_channels': [48, 96, 192, 384]},
    'vitb': {'encoder': 'vitb', 'features': 128, 'out_channels': [96, 192, 384, 768]},
    'vitl': {'encoder': 'vitl', 'features': 256, 'out_channels': [256, 512, 1024, 1024]}
}
encoder2name = {
    'vits': 'Small',
    'vitb': 'Base',
    'vitl': 'Large'
}
encoder = 'vitb'
model_name = encoder2name[encoder]
model = DepthAnythingV2(**model_configs[encoder])
filepath = hf_hub_download(repo_id=f"depth-anything/Depth-Anything-V2-{model_name}", filename=f"depth_anything_v2_{encoder}.pth", repo_type="model")
state_dict = torch.load(filepath, map_location="cpu")
model.load_state_dict(state_dict)
model = model.to(DEVICE).eval()

title = "# Chub Image Stuff"
description = """This is an endpoint for some image operations for a Chub.ai stage. It was just a copy of [Depth Anything V2](https://depth-anything-v2.github.io),
but now also includes [BRIA](https://huggingface.co/briaai/RMBG-1.4) for background removal."""

@spaces.GPU(duration=6)
def predict_depth(image):
    return model.infer_image(image)

with gr.Blocks(css=css) as demo:
    gr.Markdown(title)
    gr.Markdown(description)
    gr.Markdown("### Image Processing Stuff")

    with gr.Row():
        input_image = gr.Image(label="Input Image", type='numpy', elem_id='img-display-input')
        depth_image_slider = ImageSlider(label="Slider View", elem_id='img-display-output', position=0.5)
    depth_submit = gr.Button(value="Compute Depth")
    remove_background_submit = gr.Button(value="Remove Background")
    gray_depth_file = gr.File(label="Grayscale depth map", elem_id="download",)
    raw_file = gr.File(label="16-bit raw output (can be considered as disparity)", elem_id="download",)

    cmap = matplotlib.colormaps.get_cmap('Spectral_r')

    def remove_background(image):
        original_image = image.copy()

        result_image = process_background(image)
        tmp_file = tempfile.NamedTemporaryFile(suffix='.png', delete=False)
        result_image.save(tmp_file.name)
        return [(original_image, result_image), tmp_file.name, tmp_file.name]

    def on_submit(image):
        original_image = image.copy()

        h, w = image.shape[:2]

        depth = predict_depth(image[:, :, ::-1])

        raw_depth = Image.fromarray(depth.astype('uint16'))
        tmp_raw_depth = tempfile.NamedTemporaryFile(suffix='.png', delete=False)
        raw_depth.save(tmp_raw_depth.name)

        depth = (depth - depth.min()) / (depth.max() - depth.min()) * 255.0
        depth = depth.astype(np.uint8)
        colored_depth = (cmap(depth)[:, :, :3] * 255).astype(np.uint8)

        gray_depth = Image.fromarray(depth)
        tmp_gray_depth = tempfile.NamedTemporaryFile(suffix='.png', delete=False)
        gray_depth.save(tmp_gray_depth.name)

        return [(original_image, colored_depth), tmp_gray_depth.name, tmp_raw_depth.name]

    depth_submit.click(on_submit, inputs=[input_image], outputs=[depth_image_slider, gray_depth_file, raw_file], api_name="predict_depth")
    remove_background_submit.click(remove_background, inputs=[input_image], outputs=[depth_image_slider, gray_depth_file, raw_file], api_name="remove_background")

if __name__ == '__main__':
    demo.queue().launch(share=True)