File size: 5,917 Bytes
861d0c3
 
 
 
 
5a853b5
861d0c3
 
 
40c5bb2
 
861d0c3
 
 
5a853b5
861d0c3
 
 
5a853b5
8adef27
 
 
 
 
 
 
 
 
5a853b5
 
 
 
 
 
 
8adef27
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5a853b5
 
 
 
 
8adef27
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5a853b5
861d0c3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
04f866e
861d0c3
 
 
 
04f866e
861d0c3
 
 
 
 
 
 
 
 
3f1abeb
 
 
861d0c3
04f866e
861d0c3
 
 
 
 
 
9308bbb
861d0c3
 
 
3f1abeb
b25814e
 
861d0c3
 
 
 
 
5a853b5
501f111
 
43d8e3b
 
 
501f111
5a853b5
861d0c3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b25814e
501f111
861d0c3
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
import gradio as gr
import cv2
import matplotlib
import numpy as np
import os
import PIL
from PIL import Image
import spaces
import torch
import torch.nn.functional as F
from torchvision.transforms.functional import normalize
import tempfile
from gradio_imageslider import ImageSlider
from huggingface_hub import hf_hub_download
from briarmbg import BriaRMBG

from depth_anything_v2.dpt import DepthAnythingV2


net_cpu = BriaRMBG.from_pretrained("briaai/RMBG-1.4")
net_cpu.to('cpu')
net_cpu.eval()

net_gpu = None
if torch.cuda.is_available():
    net_gpu = BriaRMBG.from_pretrained("briaai/RMBG-1.4")
    net_gpu.to('cuda')
    net_gpu.eval()

def resize_image(image):
    image = image.convert('RGB')
    model_input_size = (1024, 1024)
    image = image.resize(model_input_size, Image.BILINEAR)
    return image

def _run_rmbg_on_image(image_np, net, device_str):
    """Shared helper: run RMBG net on a numpy image and return a PIL RGBA with alpha mask."""
    orig_image = Image.fromarray(image_np)
    w, h = orig_image.size
    img = resize_image(orig_image)
    im_np = np.array(img)
    im_tensor = torch.tensor(im_np, dtype=torch.float32).permute(2, 0, 1).unsqueeze(0) / 255.0
    im_tensor = normalize(im_tensor, [0.5, 0.5, 0.5], [1.0, 1.0, 1.0])
    if device_str == 'cuda':
        im_tensor = im_tensor.cuda()
    with torch.no_grad():
        result = net(im_tensor)
    result = torch.squeeze(F.interpolate(result[0][0], size=(h, w), mode='bilinear'), 0)
    ma = torch.max(result); mi = torch.min(result)
    result = (result - mi) / (ma - mi + 1e-8)
    result_array = (result * 255).cpu().numpy().astype(np.uint8)
    pil_mask = Image.fromarray(np.squeeze(result_array))
    new_im = orig_image.copy()
    new_im.putalpha(pil_mask)
    return new_im

@spaces.GPU(duration=6)
def process_background_gpu(image):
    if net_gpu is None:
        raise RuntimeError("No GPU instance available")
    return _run_rmbg_on_image(image, net_gpu, 'cuda')

def process_background_cpu(image):
    return _run_rmbg_on_image(image, net_cpu, 'cpu')

# wrapper used by the UI: try GPU first, fall back to CPU on any exception
def process_background(image):
    try:
        # attempt GPU call (this can raise if Zero-GPU is unavailable)
        return process_background_gpu(image)
    except Exception:
        # fallback to CPU path
        return process_background_cpu(image)


css = """
#img-display-container {
    max-height: 100vh;
}
#img-display-input {
    max-height: 80vh;
}
#img-display-output {
    max-height: 80vh;
}
#download {
    height: 62px;
}
"""
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
model_configs = {
    'vits': {'encoder': 'vits', 'features': 64, 'out_channels': [48, 96, 192, 384]},
    'vitb': {'encoder': 'vitb', 'features': 128, 'out_channels': [96, 192, 384, 768]},
    'vitl': {'encoder': 'vitl', 'features': 256, 'out_channels': [256, 512, 1024, 1024]}
}
encoder2name = {
    'vits': 'Small',
    'vitb': 'Base',
    'vitl': 'Large'
}
encoder = 'vitb'
model_name = encoder2name[encoder]
model = DepthAnythingV2(**model_configs[encoder])
filepath = hf_hub_download(repo_id=f"depth-anything/Depth-Anything-V2-{model_name}", filename=f"depth_anything_v2_{encoder}.pth", repo_type="model")
state_dict = torch.load(filepath, map_location="cpu")
model.load_state_dict(state_dict)
model = model.to(DEVICE).eval()

title = "# Chub Image Stuff"
description = """This is an endpoint for some image operations for a Chub.ai stage. It was just a copy of [Depth Anything V2](https://depth-anything-v2.github.io),
but now also includes [BRIA](https://huggingface.co/briaai/RMBG-1.4) for background removal."""

@spaces.GPU(duration=6)
def predict_depth(image):
    return model.infer_image(image)

with gr.Blocks(css=css) as demo:
    gr.Markdown(title)
    gr.Markdown(description)
    gr.Markdown("### Image Processing Stuff")

    with gr.Row():
        input_image = gr.Image(label="Input Image", type='numpy', elem_id='img-display-input')
        depth_image_slider = ImageSlider(label="Slider View", elem_id='img-display-output', position=0.5)
    depth_submit = gr.Button(value="Compute Depth")
    remove_background_submit = gr.Button(value="Remove Background")
    gray_depth_file = gr.File(label="Grayscale depth map", elem_id="download",)
    raw_file = gr.File(label="16-bit raw output (can be considered as disparity)", elem_id="download",)

    cmap = matplotlib.colormaps.get_cmap('Spectral_r')

    def remove_background(image):
        original_image = image.copy()

        result_image = process_background(image)
        tmp_file = tempfile.NamedTemporaryFile(suffix='.png', delete=False)
        result_image.save(tmp_file.name)
        return [(original_image, result_image), tmp_file.name, tmp_file.name]

    def on_submit(image):
        original_image = image.copy()

        h, w = image.shape[:2]

        depth = predict_depth(image[:, :, ::-1])

        raw_depth = Image.fromarray(depth.astype('uint16'))
        tmp_raw_depth = tempfile.NamedTemporaryFile(suffix='.png', delete=False)
        raw_depth.save(tmp_raw_depth.name)

        depth = (depth - depth.min()) / (depth.max() - depth.min()) * 255.0
        depth = depth.astype(np.uint8)
        colored_depth = (cmap(depth)[:, :, :3] * 255).astype(np.uint8)

        gray_depth = Image.fromarray(depth)
        tmp_gray_depth = tempfile.NamedTemporaryFile(suffix='.png', delete=False)
        gray_depth.save(tmp_gray_depth.name)

        return [(original_image, colored_depth), tmp_gray_depth.name, tmp_raw_depth.name]

    depth_submit.click(on_submit, inputs=[input_image], outputs=[depth_image_slider, gray_depth_file, raw_file], api_name="predict_depth")
    remove_background_submit.click(remove_background, inputs=[input_image], outputs=[depth_image_slider, gray_depth_file, raw_file], api_name="remove_background")

if __name__ == '__main__':
    demo.queue().launch(share=True)