Spaces:

niye4
/

depthmap

Build error

App Files Files Community

niye4 commited on Nov 29, 2025

Commit

9eeb5d9

verified ·

1 Parent(s): 3879970

Update app.py

Browse files

Files changed (1) hide show

app.py +32 -18

app.py CHANGED Viewed

@@ -1,5 +1,4 @@
 import os
-import shutil
 import cv2
 import numpy as np
 import torch
@@ -7,13 +6,14 @@ from PIL import Image
 import gradio as gr
 from gradio_imageslider import ImageSlider
 from depth_anything_v2.dpt import DepthAnythingV2
 # ===============================
 # Device & Model
 # ===============================
 DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
-# Only vitb
 MODEL_PATH = "checkpoints/depth_anything_v2_vitb.pth"
 model = DepthAnythingV2(encoder='vitb', features=128, out_channels=[96,192,384,768])
 state_dict = torch.load(MODEL_PATH, map_location="cpu")
@@ -24,20 +24,35 @@ model = model.to(DEVICE).eval()
 # Predict depth for single frame
 # ===============================
 def predict_depth(frame_rgb):
-    return model.infer_image(frame_rgb)
 # ===============================
 # Process video
 # ===============================
 def process_video(video_file):
     """
-    Render a grayscale DepthMap video from uploaded MP4.
-    Only vitb model, fast, high quality for vitb.
     """
     OUTPUT_DIR = "output"
     os.makedirs(OUTPUT_DIR, exist_ok=True)
     video_path = os.path.join(OUTPUT_DIR, os.path.basename(video_file.name))
     shutil.copy(video_file.name, video_path)
     cap = cv2.VideoCapture(video_path)
@@ -48,10 +63,12 @@ def process_video(video_file):
     width  = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
     height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
     output_video_path = os.path.join(OUTPUT_DIR, os.path.basename(video_path).replace(".mp4","_depth.mp4"))
-    fourcc = cv2.VideoWriter_fourcc(*'mp4v')  # browser-compatible MP4
     out = cv2.VideoWriter(output_video_path, fourcc, fps, (width,height), isColor=True)
     slider_frames = []
     max_slider_frames = 30
     total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
@@ -66,17 +83,13 @@ def process_video(video_file):
         frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
         depth_map = predict_depth(frame_rgb)
-        # Keep 16-bit depth for video output (preserve details)
-        depth_16bit = depth_map.astype(np.uint16)
-        # Scale for preview slider (8-bit)
-        depth_8bit = ((depth_16bit / depth_16bit.max()) * 255).astype(np.uint8)
-        depth_rgb_preview = cv2.cvtColor(depth_8bit, cv2.COLOR_GRAY2BGR)
-        out.write(depth_rgb_preview)
-        # Add sampled frames for slider
         if idx % step == 0:
-            slider_frames.append(Image.fromarray(depth_8bit))
         idx += 1
     cap.release()
@@ -87,10 +100,11 @@ def process_video(video_file):
 # Gradio Interface
 # ===============================
 with gr.Blocks() as demo:
-    gr.Markdown("# Depth Anything V2 – Grayscale Video (vitb)")
     gr.Markdown(
-        "Upload an MP4 video and generate a grayscale DepthMap video.\n\n"
-        "**Model:** vitb – fast and high quality for real-time preview."
     )
     video_input = gr.File(label="Upload MP4", file_types=['.mp4'])

 import os
 import cv2
 import numpy as np
 import torch
 import gradio as gr
 from gradio_imageslider import ImageSlider
 from depth_anything_v2.dpt import DepthAnythingV2
+import matplotlib.pyplot as plt
+import matplotlib
 # ===============================
 # Device & Model
 # ===============================
 DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
 MODEL_PATH = "checkpoints/depth_anything_v2_vitb.pth"
 model = DepthAnythingV2(encoder='vitb', features=128, out_channels=[96,192,384,768])
 state_dict = torch.load(MODEL_PATH, map_location="cpu")
 # Predict depth for single frame
 # ===============================
 def predict_depth(frame_rgb):
+    """Return depth map as float32"""
+    depth = model.infer_image(frame_rgb)
+    return depth.astype(np.float32)
+# ===============================
+# Colormap for preview
+# ===============================
+cmap = matplotlib.cm.get_cmap('magma')  # nice perceptual colormap
+def apply_colormap(depth):
+    """Scale depth to 0-1 and apply colormap, return uint8 RGB"""
+    norm = (depth - depth.min()) / (depth.max() - depth.min() + 1e-8)
+    colored = (cmap(norm)[:, :, :3] * 255).astype(np.uint8)
+    return colored
 # ===============================
 # Process video
 # ===============================
 def process_video(video_file):
     """
+    Render depthmap video with colormap.
+    Keep original resolution & FPS.
     """
     OUTPUT_DIR = "output"
     os.makedirs(OUTPUT_DIR, exist_ok=True)
     video_path = os.path.join(OUTPUT_DIR, os.path.basename(video_file.name))
+    # Copy input video
+    import shutil
     shutil.copy(video_file.name, video_path)
     cap = cv2.VideoCapture(video_path)
     width  = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
     height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+    # Video output path
     output_video_path = os.path.join(OUTPUT_DIR, os.path.basename(video_path).replace(".mp4","_depth.mp4"))
+    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
     out = cv2.VideoWriter(output_video_path, fourcc, fps, (width,height), isColor=True)
+    # Slider preview (sample frames)
     slider_frames = []
     max_slider_frames = 30
     total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
         frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
         depth_map = predict_depth(frame_rgb)
+        # Apply colormap for video output
+        colored_frame = apply_colormap(depth_map)
+        out.write(cv2.cvtColor(colored_frame, cv2.COLOR_RGB2BGR))
+        # Add sampled frames for slider preview
         if idx % step == 0:
+            slider_frames.append(Image.fromarray(colored_frame))
         idx += 1
     cap.release()
 # Gradio Interface
 # ===============================
 with gr.Blocks() as demo:
+    gr.Markdown("# Depth Anything V2 – Depth Video (vitb)")
     gr.Markdown(
+        "Upload an MP4 video to generate a **colored DepthMap video**.\n\n"
+        "**Model:** vitb – fast and high quality for real-time processing.\n"
+        "Resolution and FPS are preserved from the original video."
     )
     video_input = gr.File(label="Upload MP4", file_types=['.mp4'])