saarstefan's picture
change model from small to base
fa2a45a verified
Raw
History Blame Contribute Delete
2.01 kB
import gradio as gr
import numpy as np
import torch
from PIL import Image
from transformers import AutoImageProcessor, AutoModelForDepthEstimation
# Load model and processor once at startup
MODEL_ID = "depth-anything/Depth-Anything-V2-Base-hf"
print(f"Loading model: {MODEL_ID}...")
image_processor = AutoImageProcessor.from_pretrained(MODEL_ID)
model = AutoModelForDepthEstimation.from_pretrained(MODEL_ID)
model.eval()
print("Model loaded.")
def estimate_depth(input_image):
"""
Takes a PIL image, runs depth estimation, returns the depth map as a grayscale PIL image.
"""
if input_image is None:
raise gr.Error("Please upload an image.")
# Resize so longest side is 518px (model's native resolution)
w, h = input_image.size
scale = 518 / max(w, h)
new_w = round(w * scale)
new_h = round(h * scale)
resized = input_image.resize((new_w, new_h), Image.LANCZOS)
# Preprocess
inputs = image_processor(images=resized, return_tensors="pt")
# Inference
with torch.no_grad():
outputs = model(**inputs)
# Post-process: interpolate to resized image dimensions
predicted_depth = outputs.predicted_depth.unsqueeze(1)
predicted_depth = torch.nn.functional.interpolate(
predicted_depth,
size=(new_h, new_w),
mode="bicubic",
align_corners=False,
).squeeze()
# Normalize to 0-255
depth = predicted_depth.cpu().numpy()
depth = (depth - depth.min()) / (depth.max() - depth.min()) * 255.0
depth_image = Image.fromarray(depth.astype(np.uint8))
return depth_image
# Build the Gradio interface
demo = gr.Interface(
fn=estimate_depth,
inputs=gr.Image(type="pil", label="Upload Image"),
outputs=gr.Image(type="pil", label="Depth Map"),
title="Depth Anything V2 — Depth Estimation",
description="Upload an image to generate a depth map. Lighter areas are closer, darker areas are farther.",
examples=[],
)
if __name__ == "__main__":
demo.launch()