import glob import gradio as gr import numpy as np import spaces import torch import tempfile import uuid from huggingface_hub import hf_hub_download from PIL import Image, ImageOps, ImageEnhance from pathlib import Path from zipfile import ZipFile, is_zipfile from pypdf import PdfReader from depth_anything_v2.dpt import DepthAnythingV2 css = """ #img-display-container { max-height: 100vh; } #img-display-input { max-height: 80vh; } #img-display-output { max-height: 80vh; } #download { height: 62px; } .thumbnail-item { aspect-ratio: var(--ratio-wide) } .thumbnail-item img { object-fit: contain } """ head = """ """ DEVICE = 'cuda' if torch.cuda.is_available() else 'mps' if torch.backends.mps.is_available() else 'cpu' model_configs = { 'vits': {'encoder': 'vits', 'features': 64, 'out_channels': [48, 96, 192, 384]}, 'vitb': {'encoder': 'vitb', 'features': 128, 'out_channels': [96, 192, 384, 768]}, 'vitl': {'encoder': 'vitl', 'features': 256, 'out_channels': [256, 512, 1024, 1024]}, 'vitg': {'encoder': 'vitg', 'features': 384, 'out_channels': [1536, 1536, 1536, 1536]} } encoder2name = { 'vits': 'Small', 'vitb': 'Base', 'vitl': 'Large', 'vitg': 'Giant', # we are undergoing company review procedures to release our giant model checkpoint } title = "# Depth Anything V2" description = """Looking Glass demo for **Depth Anything V2**. Please refer to our [paper](https://arxiv.org/abs/2406.09414), [project page](https://depth-anything-v2.github.io), or [github](https://github.com/DepthAnything/Depth-Anything-V2) for more details.""" @spaces.GPU def predict_depth(image, model): w, h = image.size depth = model.infer_image(np.array(image.convert("RGB"))[:, :, ::-1]) depth = (depth - depth.min()) / (depth.max() - depth.min()) * 255.0 depth = depth.astype(np.uint8) gray_depth = Image.fromarray(depth) rgbd = Image.new(image.mode, (w * 2, h)) rgbd.paste(image, (0, 0)) rgbd.paste(gray_depth, (w, 0)) return rgbd @spaces.GPU def upscale_image(image, model, background, discard_alpha): if image.mode == "RGBA": if discard_alpha: image = Image.alpha_composite(ImageOps.pad(background, image.size, color=(0, 0, 0)), image); elif image.mode != "RGB": image = image.convert("RGB") if model is not None: image = model.infer(image) return image.convert("RGB") if discard_alpha else image @spaces.GPU def on_submit(image, batch_images, book, encoder, upscale_model, upscale_method, denoise_level, discard_alpha, progress=gr.Progress()): model_name = encoder2name[encoder] model = DepthAnythingV2(**model_configs[encoder]) filepath = hf_hub_download(repo_id=f"depth-anything/Depth-Anything-V2-{model_name}", filename=f"depth_anything_v2_{encoder}.pth", repo_type="model") state_dict = torch.load(filepath, map_location="cpu") model.load_state_dict(state_dict) model = model.to(DEVICE).eval() superresolution = None if upscale_method is not None: superresolution = torch.hub.load("nagadomi/nunif:master", "waifu2x", model_type=upscale_model, method=upscale_method, noise_level=denoise_level, keep_alpha=not discard_alpha, trust_repo=True).to(DEVICE) gradient = ImageEnhance.Brightness(Image.radial_gradient("L")) background = ImageOps.invert(gradient.enhance(1.5)).convert("RGBA") result = [] if image is not None: image = upscale_image(image, superresolution, background, discard_alpha) result.append((predict_depth(image, model), None)) if batch_images is not None: for path in progress.tqdm(batch_images): with Image.open(path) as img: img = upscale_image(img, superresolution, background, discard_alpha) result.append((predict_depth(img, model), Path(path).name)) if book is not None: if is_zipfile(book): with ZipFile(book, "r") as zf: for entry in progress.tqdm(zf.infolist()): with zf.open(entry) as file: with Image.open(file) as img: img = upscale_image(img, superresolution, background, discard_alpha) result.append((predict_depth(img, model), entry.filename)) else: reader = PdfReader(book) for page in progress.tqdm(reader.pages): for image_file_object in page.images: img = upscale_image(image_file_object.image, superresolution, background, discard_alpha) result.append((predict_depth(img, model), image_file_object.name)) return result def zip_gallery(gallery, progress=gr.Progress()): if gallery is None: return None if len(gallery) == 1: return gallery[0][0] temp = Path(tempfile.gettempdir()) / uuid.uuid4().hex zip = temp.with_suffix(".zip") with ZipFile(zip, "w") as zf: for index, image in progress.tqdm(enumerate(gallery)): fn = Path(image[0]).name if image[1] is None else Path(image[1]).with_suffix(".rgbd.png") zf.write(image[0], "{:02d}_{}".format(index, fn)) return zip gr.set_static_paths(paths=[Path.cwd().absolute()/"assets"]) with gr.Blocks(css=css, head=head) as demo: gr.Markdown(title) gr.Markdown(description) with gr.Row(): with gr.Column(): with gr.Tab("Single Image"): input_image = gr.Image( label="Input Image", elem_id='img-display-input', type='pil', image_mode=None ) with gr.Tab("Batch Mode"): batch_images = gr.File( label="Input Images", file_types=["image"], file_count="multiple" ) with gr.Tab("Document Mode"): book = gr.File( label="PDF/ZIP Document", file_types=[".pdf", ".zip"], ) with gr.Row(): clear = gr.ClearButton(components=[input_image, batch_images, book]) submit = gr.Button(value="Compute Depth", variant="primary") model_size = gr.Radio( label="Model Size", choices=[('Small', 'vits'), ('Base', 'vitb'), ('Large', 'vitl')], value="vitl" ) upscale_method = gr.Radio( label="Upscale Method", choices=[("No Upscaling or Denoising", None), ("Denoise Only", "noise"), ("2x Upscaling", "scale2x"), ("4x Upscaling", "scale4x")] ) upscale_model = gr.Dropdown( choices=["art", "art_scan", "photo", "swin_unet/art", "swin_unet/art_scan", "swin_unet/photo", "cunet/art", "upconv_7/art", "upconv_7/photo"], label="Upscaling Model", value="art" ) denoise_level = gr.Slider( label="Denoise Level (-1 = None)", value=0, step=1, minimum=-1, maximum=4 ) discard_alpha = gr.Checkbox(label="Add radial gradient background to transparent images", value=True) with gr.Column(): with gr.Tab("Result"): gallery = gr.Gallery( label="RGBD Images", elem_id='img-display-output', format="png", columns=4, object_fit="contain", preview=True, interactive=True ) download_btn = gr.DownloadButton() depthiness = gr.Slider( label="Depthiness", elem_id="depthiness", interactive=True, minimum=0, maximum=3, value=1 ) focus = gr.Slider( label="Focus", interactive=True, minimum=-0.03, maximum=0.03, value=0 ) zoom = gr.Slider( label="Zoom", interactive=True, minimum=0, maximum=10, value=1 ) pos_x = gr.Slider( label="Position X", interactive=True, minimum=-1, maximum=1, value=0 ) pos_y = gr.Slider( label="Position Y", interactive=True, minimum=-1, maximum=1, value=0 ) reset = gr.Button(value="Reset All Parameters") gallery.select(fn=None, js="castHologram", inputs=gallery) gallery.change(fn=zip_gallery, inputs=gallery, outputs=download_btn).then(fn=None, js="castHologram", inputs=gallery) submit.click( on_submit, inputs=[input_image, batch_images, book, model_size, upscale_model, upscale_method, denoise_level, discard_alpha], outputs=[gallery] ).success(fn=zip_gallery, inputs=gallery, outputs=download_btn).then(fn=None, js="castHologram", inputs=gallery) depthiness.change(fn=None, inputs=depthiness, js="(value) => updateHologram (value, 'depthiness')") focus.change(fn=None, inputs=focus, js="(value) => updateHologram (value, 'focus')") zoom.change(fn=None, inputs=zoom, js="(value) => updateHologram (value, 'zoom')") pos_x.change(fn=None, inputs=pos_x, js="(value) => updateHologram (value, 'crop_pos_x')") pos_y.change(fn=None, inputs=pos_y, js="(value) => updateHologram (value, 'crop_pos_y')") reset.click(fn=None, js=""" () => { document.querySelectorAll('button.reset-button').forEach(b => b.click()); } """) def on_submit_example(image): return on_submit(image, None, None, 'vitl', None, None, -1, True) example_files = glob.glob('assets/examples/*') examples = gr.Examples(examples=example_files, inputs=[input_image], outputs=[gallery], fn=on_submit_example) examples.load_input_event.success(fn=None, js="castHologram", inputs=gallery) if __name__ == '__main__': demo.queue().launch()