jiggle-physics / depth.py
Justin Wood
Initial backend
c401d3e
import numpy as np
import torch
from PIL import Image
_depth_cache = None
def get_depth_model():
global _depth_cache
if _depth_cache is None:
from transformers import pipeline as hf_pipeline
# Depth Pro from Apple — best metric depth, ~600MB
_depth_cache = hf_pipeline(
"depth-estimation",
model="apple/DepthPro-hf",
device=0 if torch.cuda.is_available() else -1,
)
return _depth_cache
def estimate_depth(image: Image.Image) -> dict:
"""
Returns {"depth": [[float]], "width": int, "height": int, "min": float, "max": float}
Depth values are metric (meters) when Depth Pro is used.
"""
pipe = get_depth_model()
result = pipe(image)
depth_map = result["depth"] # PIL image or numpy array
if isinstance(depth_map, Image.Image):
arr = np.array(depth_map).astype(np.float32)
else:
arr = np.array(depth_map, dtype=np.float32)
# Resize to match source image if needed
if arr.shape[:2] != (image.height, image.width):
depth_pil = Image.fromarray(arr).resize(
(image.width, image.height), Image.BILINEAR
)
arr = np.array(depth_pil)
dmin = float(arr.min())
dmax = float(arr.max())
return {
"depth": arr.tolist(),
"width": image.width,
"height": image.height,
"min": dmin,
"max": dmax,
}