Spaces:

xingyang1
/

Distill-Any-Depth

Build error

App Files Files Community

Update app.py

by luluscharf - opened Mar 18, 2025

base: refs/heads/main

←

from: refs/pr/7

Discussion Files changed

+20

-16

Files changed (1) hide show

app.py +20 -16

app.py CHANGED Viewed

@@ -17,14 +17,14 @@ import tempfile
 def load_model_by_name(arch_name, checkpoint_path, device):
     model = None
     if arch_name == 'depthanything':
-        # 使用 safetensors 加载模型权重
-        model_weights = load_file(checkpoint_path)  # safetensors 加载方式
-        # 初始化模型
         model = DepthAnything(checkpoint_path=None).to(device)
-        model.load_state_dict(model_weights)  # 将加载的权重应用到模型
-        model = model.to(device)  # 确保模型在正确的设备上
     else:
         raise NotImplementedError(f"Unknown architecture: {arch_name}")
     return model
@@ -37,8 +37,14 @@ def process_image(image, model, device):
     # Preprocess the image
     image_np = np.array(image)[..., ::-1] / 255
     transform = Compose([
-        Resize(756, 756, resize_target=False, keep_aspect_ratio=True, ensure_multiple_of=14, resize_method='lower_bound', image_interpolation_method=cv2.INTER_CUBIC),
         NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
         PrepareForNet()
     ])
@@ -53,8 +59,9 @@ def process_image(image, model, device):
     # Convert depth map to numpy
     pred_disp_np = pred_disp.cpu().detach().numpy()[0, 0, :, :]
-    # Normalize depth map
     pred_disp_normalized = (pred_disp_np - pred_disp_np.min()) / (pred_disp_np.max() - pred_disp_np.min())
     # Colorized depth map
     cmap = "Spectral_r"
@@ -68,19 +75,16 @@ def process_image(image, model, device):
     # Save raw depth map as a temporary npy file
     with tempfile.NamedTemporaryFile(delete=False, suffix=".npy") as temp_file:
-        np.save(temp_file.name, pred_disp_normalized)
         depth_raw_path = temp_file.name
     # Resize outputs to match original image size
-    h, w = image_np.shape[:2]
-    depth_colored_hwc = cv2.resize(depth_colored_hwc, (w, h), cv2.INTER_LINEAR)
-    depth_gray_hwc = cv2.resize(depth_gray_hwc, (w, h), cv2.INTER_LINEAR)
     # Convert to PIL images
     return image, Image.fromarray(depth_colored_hwc), Image.fromarray(depth_gray_hwc), depth_raw_path
 # Gradio interface function with GPU support
 @spaces.GPU
 def gradio_interface(image):
@@ -108,10 +112,10 @@ def gradio_interface(image):
     model = DepthAnything(**model_kwargs['vitl']).to(device)
     checkpoint_path = hf_hub_download(repo_id=f"xingyang1/Distill-Any-Depth", filename=f"large/model.safetensors", repo_type="model")
-    # 使用 safetensors 加载模型权重
-    model_weights = load_file(checkpoint_path)  # safetensors 加载方式
     model.load_state_dict(model_weights)
-    model = model.to(device)  # 确保模型在正确的设备上
     if model is None:
         return None, None, None, None

 def load_model_by_name(arch_name, checkpoint_path, device):
     model = None
     if arch_name == 'depthanything':
+        # Use safetensors to load model weights
+        model_weights = load_file(checkpoint_path)  # Load using safetensors
+        # Initialize model
         model = DepthAnything(checkpoint_path=None).to(device)
+        model.load_state_dict(model_weights)  # Apply loaded weights to the model
+        model = model.to(device)  # Ensure the model is on the correct device
     else:
         raise NotImplementedError(f"Unknown architecture: {arch_name}")
     return model
     # Preprocess the image
     image_np = np.array(image)[..., ::-1] / 255
+    # Resize input image to 1920p while maintaining aspect ratio
+    h, w = image_np.shape[:2]
+    scale = 1920 / max(h, w)
+    new_h, new_w = int(h * scale), int(w * scale)
+    image_np = cv2.resize(image_np, (new_w, new_h), interpolation=cv2.INTER_CUBIC)
     transform = Compose([
+        Resize(new_h, new_w, resize_target=False, keep_aspect_ratio=True, ensure_multiple_of=14, resize_method='lower_bound', image_interpolation_method=cv2.INTER_CUBIC),
         NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
         PrepareForNet()
     ])
     # Convert depth map to numpy
     pred_disp_np = pred_disp.cpu().detach().numpy()[0, 0, :, :]
+    # Normalize depth map to 16-bit range [0, 65535]
     pred_disp_normalized = (pred_disp_np - pred_disp_np.min()) / (pred_disp_np.max() - pred_disp_np.min())
+    pred_disp_16bit = (pred_disp_normalized * 65535).astype(np.uint16)
     # Colorized depth map
     cmap = "Spectral_r"
     # Save raw depth map as a temporary npy file
     with tempfile.NamedTemporaryFile(delete=False, suffix=".npy") as temp_file:
+        np.save(temp_file.name, pred_disp_16bit)
         depth_raw_path = temp_file.name
     # Resize outputs to match original image size
+    depth_colored_hwc = cv2.resize(depth_colored_hwc, (new_w, new_h), cv2.INTER_LINEAR)
+    depth_gray_hwc = cv2.resize(depth_gray_hwc, (new_w, new_h), cv2.INTER_LINEAR)
     # Convert to PIL images
     return image, Image.fromarray(depth_colored_hwc), Image.fromarray(depth_gray_hwc), depth_raw_path
 # Gradio interface function with GPU support
 @spaces.GPU
 def gradio_interface(image):
     model = DepthAnything(**model_kwargs['vitl']).to(device)
     checkpoint_path = hf_hub_download(repo_id=f"xingyang1/Distill-Any-Depth", filename=f"large/model.safetensors", repo_type="model")
+    # Use safetensors to load model weights
+    model_weights = load_file(checkpoint_path)  # Load using safetensors
     model.load_state_dict(model_weights)
+    model = model.to(device)  # Ensure the model is on the correct device
     if model is None:
         return None, None, None, None