|
|
import gradio as gr |
|
|
import os |
|
|
import torch |
|
|
|
|
|
import torch |
|
|
from PIL import Image |
|
|
from diffusers import ( |
|
|
AutoencoderKL, |
|
|
) |
|
|
|
|
|
from transformers import CLIPTextModel, CLIPTokenizer |
|
|
from apdepth.marigold_pipeline import MarigoldPipeline |
|
|
from apdepth.modules.unet_2d_condition import UNet2DConditionModel |
|
|
|
|
|
def load_example(example_images): |
|
|
|
|
|
return example_images |
|
|
|
|
|
|
|
|
device = "cuda" if torch.cuda.is_available() else "cpu" |
|
|
model_repo_id = "developy/ApDepth" |
|
|
|
|
|
torch_dtype = torch.float32 |
|
|
|
|
|
vae = AutoencoderKL.from_pretrained(model_repo_id, subfolder="vae", torch_dtype=torch_dtype, allow_pickle=False) |
|
|
unet = UNet2DConditionModel.from_pretrained(model_repo_id, subfolder="unet", torch_dtype=torch_dtype, allow_pickle=False) |
|
|
text_encoder = CLIPTextModel.from_pretrained(model_repo_id, subfolder="text_encoder", torch_dtype=torch_dtype) |
|
|
tokenizer = CLIPTokenizer.from_pretrained(model_repo_id, subfolder="tokenizer", torch_dtype=torch_dtype) |
|
|
pipe = MarigoldPipeline(vae=vae, unet=unet, text_encoder=text_encoder, tokenizer=tokenizer) |
|
|
|
|
|
|
|
|
try: |
|
|
pipe.enable_xformers_memory_efficient_attention() |
|
|
except ImportError: |
|
|
pass |
|
|
|
|
|
pipe = pipe.to(device) |
|
|
|
|
|
|
|
|
|
|
|
def infer( |
|
|
input_image, |
|
|
progress=gr.Progress(track_tqdm=True), |
|
|
): |
|
|
|
|
|
pipe_out = pipe( |
|
|
input_image, |
|
|
processing_res=768, |
|
|
match_input_res=True, |
|
|
batch_size=1, |
|
|
color_map="Spectral", |
|
|
show_progress_bar=True, |
|
|
resample_method="bilinear", |
|
|
) |
|
|
|
|
|
|
|
|
depth_colored: Image.Image = pipe_out.depth_colored |
|
|
|
|
|
|
|
|
return depth_colored |
|
|
|
|
|
|
|
|
|
|
|
example_images = [ |
|
|
"example/00.jpg", |
|
|
"example/01.jpg", |
|
|
"example/02.jpg", |
|
|
"example/03.jpg", |
|
|
"example/04.jpg", |
|
|
"example/05.jpg", |
|
|
"example/06.jpg", |
|
|
"example/07.jpg", |
|
|
"example/08.jpg", |
|
|
] |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
css = """ |
|
|
#img-display-container { |
|
|
max-height: 100vh; |
|
|
} |
|
|
#img-display-input { |
|
|
max-height: 80vh; |
|
|
} |
|
|
#img-display-output { |
|
|
max-height: 80vh; |
|
|
} |
|
|
#download { |
|
|
height: 62px; |
|
|
} |
|
|
""" |
|
|
|
|
|
title = "# ApDepth" |
|
|
description = """**Official demo for ApDepth**(We provide models trained using Depth Anything v2-base here, as the Hugging Face space is limited to 1GB.). |
|
|
Please refer to our [website](https://haruko386.github.io/research/) for more details.""" |
|
|
|
|
|
|
|
|
with gr.Blocks(css=css) as demo: |
|
|
gr.Markdown(title) |
|
|
gr.Markdown(description) |
|
|
gr.Markdown(" ### Depth Estimation with ApDepth.") |
|
|
|
|
|
|
|
|
with gr.Row(): |
|
|
with gr.Column(): |
|
|
input_image = gr.Image(label="Input Image", type="pil", elem_id="img-display-input") |
|
|
with gr.Column(): |
|
|
|
|
|
depth_map = gr.Image(label="Depth Image", type="pil", interactive=False, elem_id="depth-map") |
|
|
|
|
|
|
|
|
compute_button = gr.Button(value="Compute Depth") |
|
|
|
|
|
|
|
|
compute_button.click( |
|
|
fn=infer, |
|
|
inputs=[input_image], |
|
|
outputs=[depth_map] |
|
|
) |
|
|
|
|
|
example_files = os.listdir('example') |
|
|
example_files.sort() |
|
|
example_files = [os.path.join('example', filename) for filename in example_files] |
|
|
examples = gr.Examples(examples=example_files, inputs=[input_image], outputs=[depth_map], fn=infer) |
|
|
|
|
|
|
|
|
|
|
|
demo.queue().launch(share=True) |
|
|
|