Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,63 +1,149 @@
|
|
|
|
|
| 1 |
import os
|
|
|
|
|
|
|
| 2 |
import torch
|
|
|
|
| 3 |
import gradio as gr
|
| 4 |
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
# ---------------------------------------------------------
|
| 9 |
-
# CPU-safe DepthAnything3 loader
|
| 10 |
-
# ---------------------------------------------------------
|
| 11 |
-
|
| 12 |
-
def load_model_cpu(model_dir):
|
| 13 |
-
print("🔄 Loading DepthAnything3 model on CPU...")
|
| 14 |
-
model = DepthAnything3.from_pretrained(model_dir, config_name="config.json")
|
| 15 |
-
model.to("cpu")
|
| 16 |
-
model.eval()
|
| 17 |
-
print("✅ Model ready on CPU")
|
| 18 |
-
return model
|
| 19 |
-
|
| 20 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
MODEL_DIR = os.environ.get("DA3_MODEL_DIR", "depth-anything/DA3NESTED-GIANT-LARGE")
|
| 22 |
-
model = load_model_cpu(MODEL_DIR)
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
# ---------------------------------------------------------
|
| 26 |
-
# CPU-safe inference (single image only)
|
| 27 |
-
# ---------------------------------------------------------
|
| 28 |
|
| 29 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 30 |
"""
|
| 31 |
-
|
| 32 |
-
|
| 33 |
"""
|
| 34 |
-
if
|
| 35 |
return None
|
| 36 |
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
#
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 53 |
|
| 54 |
-
|
|
|
|
| 55 |
fn=run_depth,
|
| 56 |
-
inputs=
|
| 57 |
-
|
|
|
|
|
|
|
|
|
|
| 58 |
title=title,
|
| 59 |
description=description,
|
| 60 |
)
|
| 61 |
|
|
|
|
| 62 |
if __name__ == "__main__":
|
| 63 |
-
|
|
|
|
| 1 |
+
# app.py (safe CPU startup for HF Spaces)
|
| 2 |
import os
|
| 3 |
+
import io
|
| 4 |
+
import numpy as np
|
| 5 |
import torch
|
| 6 |
+
from PIL import Image
|
| 7 |
import gradio as gr
|
| 8 |
|
| 9 |
+
# Import the CPU-patched class you added earlier
|
| 10 |
+
from depth_anything_3.api import DepthAnything3
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
|
| 12 |
+
# ---------------------------
|
| 13 |
+
# Configuration
|
| 14 |
+
# ---------------------------
|
| 15 |
+
# Keep the same model path you used earlier (default is the one in your logs)
|
| 16 |
MODEL_DIR = os.environ.get("DA3_MODEL_DIR", "depth-anything/DA3NESTED-GIANT-LARGE")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
|
| 18 |
+
# Lower processing resolution to make CPU inference feasible.
|
| 19 |
+
# Increase if you want better quality but expect it to be much slower.
|
| 20 |
+
PROCESS_RES = int(os.environ.get("DA3_PROCESS_RES", "384"))
|
| 21 |
+
|
| 22 |
+
# ---------------------------
|
| 23 |
+
# Model loading (CPU)
|
| 24 |
+
# ---------------------------
|
| 25 |
+
print(f"🔄 Loading DepthAnything3 from '{MODEL_DIR}' on CPU (this may take a moment)...")
|
| 26 |
+
# Uses the PyTorchModelHubMixin.from_pretrained you have in the class
|
| 27 |
+
model = DepthAnything3.from_pretrained(MODEL_DIR)
|
| 28 |
+
model.to(torch.device("cpu"))
|
| 29 |
+
model.eval()
|
| 30 |
+
print("✅ Model ready on CPU")
|
| 31 |
+
|
| 32 |
+
# ---------------------------
|
| 33 |
+
# Inference helper
|
| 34 |
+
# ---------------------------
|
| 35 |
+
def _normalize_depth_to_uint8(depth: np.ndarray) -> np.ndarray:
|
| 36 |
+
"""Normalize a depth map (H,W) to uint8 grayscale for display."""
|
| 37 |
+
if depth is None:
|
| 38 |
+
return None
|
| 39 |
+
# convert to float
|
| 40 |
+
d = depth.astype(np.float32)
|
| 41 |
+
# clip NaNs / infs
|
| 42 |
+
d = np.nan_to_num(d, nan=0.0, posinf=0.0, neginf=0.0)
|
| 43 |
+
# Normalize robustly: use 1st and 99th percentiles to avoid outliers
|
| 44 |
+
vmin = np.percentile(d, 1.0)
|
| 45 |
+
vmax = np.percentile(d, 99.0)
|
| 46 |
+
if vmax - vmin < 1e-6:
|
| 47 |
+
vmax = vmin + 1.0
|
| 48 |
+
d = (d - vmin) / (vmax - vmin)
|
| 49 |
+
d = np.clip(d, 0.0, 1.0)
|
| 50 |
+
img = (d * 255.0).astype(np.uint8)
|
| 51 |
+
return img
|
| 52 |
+
|
| 53 |
+
def run_depth(single_img: Image.Image, process_res: int = PROCESS_RES):
|
| 54 |
"""
|
| 55 |
+
Run single-image depth inference with the patched DepthAnything3 API.
|
| 56 |
+
Returns a grayscale PIL image visualizing depth.
|
| 57 |
"""
|
| 58 |
+
if single_img is None:
|
| 59 |
return None
|
| 60 |
|
| 61 |
+
# Convert PIL to numpy (DepthAnything3 accepts PIL images)
|
| 62 |
+
try:
|
| 63 |
+
# Use the API's inference function; we pass a list with single image.
|
| 64 |
+
# Keep other args minimal to avoid heavy processing.
|
| 65 |
+
pred = model.inference(
|
| 66 |
+
[single_img],
|
| 67 |
+
process_res=process_res,
|
| 68 |
+
process_res_method="upper_bound_resize",
|
| 69 |
+
export_format="mini_npz", # minimal export
|
| 70 |
+
)
|
| 71 |
+
except Exception as e:
|
| 72 |
+
# If inference raises, return a helpful message image
|
| 73 |
+
msg = f"Inference error: {e}"
|
| 74 |
+
print(msg)
|
| 75 |
+
# Make a small image with the error text
|
| 76 |
+
err_img = Image.new("RGB", (640, 120), color=(255, 255, 255))
|
| 77 |
+
return err_img
|
| 78 |
+
|
| 79 |
+
# Extract depth from Prediction object - handle a few possible shapes / attrs
|
| 80 |
+
depth_map = None
|
| 81 |
+
# First try attribute .depth (common pattern in your code)
|
| 82 |
+
if hasattr(pred, "depth"):
|
| 83 |
+
depth_map = pred.depth
|
| 84 |
+
elif isinstance(pred, dict) and "depth" in pred:
|
| 85 |
+
depth_map = pred["depth"]
|
| 86 |
+
elif hasattr(pred, "predictions") and len(pred.predictions) > 0:
|
| 87 |
+
# fallback: some wrappers store lists
|
| 88 |
+
depth_map = pred.predictions[0].depth if hasattr(pred.predictions[0], "depth") else None
|
| 89 |
+
|
| 90 |
+
# depth_map might be (N,H,W) or (H,W)
|
| 91 |
+
if depth_map is None:
|
| 92 |
+
# fallback: try processed_images if available (visual sanity)
|
| 93 |
+
try:
|
| 94 |
+
if hasattr(pred, "processed_images"):
|
| 95 |
+
imgs = pred.processed_images
|
| 96 |
+
if isinstance(imgs, np.ndarray) and imgs.shape[0] > 0:
|
| 97 |
+
# return first processed image
|
| 98 |
+
return Image.fromarray((imgs[0] * 255).astype(np.uint8))
|
| 99 |
+
except Exception:
|
| 100 |
+
pass
|
| 101 |
+
# nothing usable
|
| 102 |
+
print("No depth found in prediction; returning empty image.")
|
| 103 |
+
return Image.new("RGB", (640, 480), color=(255, 255, 255))
|
| 104 |
+
|
| 105 |
+
# If depth_map is batched, take first
|
| 106 |
+
if isinstance(depth_map, (list, tuple)):
|
| 107 |
+
depth_map = depth_map[0]
|
| 108 |
+
if isinstance(depth_map, np.ndarray) and depth_map.ndim == 3 and depth_map.shape[0] in (1,):
|
| 109 |
+
# shape (1,H,W)
|
| 110 |
+
depth_map = depth_map[0]
|
| 111 |
+
if isinstance(depth_map, torch.Tensor):
|
| 112 |
+
depth_map = depth_map.cpu().numpy()
|
| 113 |
+
# Now depth_map should be (H,W)
|
| 114 |
+
if depth_map.ndim == 3 and depth_map.shape[0] == 3:
|
| 115 |
+
# if somehow 3-channel, convert to single channel by averaging
|
| 116 |
+
depth_map = depth_map.mean(axis=0)
|
| 117 |
+
|
| 118 |
+
depth_uint8 = _normalize_depth_to_uint8(depth_map)
|
| 119 |
+
if depth_uint8 is None:
|
| 120 |
+
return Image.new("RGB", (640, 480), color=(255, 255, 255))
|
| 121 |
+
|
| 122 |
+
# Return grayscale PIL image
|
| 123 |
+
depth_img = Image.fromarray(depth_uint8, mode="L")
|
| 124 |
+
return depth_img
|
| 125 |
+
|
| 126 |
+
# ---------------------------
|
| 127 |
+
# Gradio interface
|
| 128 |
+
# ---------------------------
|
| 129 |
+
title = "Depth Anything 3 — CPU (single-image)"
|
| 130 |
+
description = (
|
| 131 |
+
"CPU-only minimal interface. Upload a single image and get a quick depth visualization.\n"
|
| 132 |
+
"This Space is intentionally lightweight to allow CPU startup. For better quality/multiview features you need GPU or the full app."
|
| 133 |
+
)
|
| 134 |
|
| 135 |
+
# Make the Gradio Interface the top-level `app` variable so HF Spaces detects it
|
| 136 |
+
app = gr.Interface(
|
| 137 |
fn=run_depth,
|
| 138 |
+
inputs=[
|
| 139 |
+
gr.Image(type="pil", label="Upload image"),
|
| 140 |
+
gr.Slider(minimum=128, maximum=1024, step=64, value=PROCESS_RES, label="Process resolution (smaller = faster)")
|
| 141 |
+
],
|
| 142 |
+
outputs=gr.Image(label="Predicted depth (grayscale)"),
|
| 143 |
title=title,
|
| 144 |
description=description,
|
| 145 |
)
|
| 146 |
|
| 147 |
+
# For local running
|
| 148 |
if __name__ == "__main__":
|
| 149 |
+
app.launch(server_name="0.0.0.0", server_port=7860)
|