Spaces:

niye4
/

depthmap

Build error

App Files Files Community

niye4 commited on Nov 29, 2025

Commit

fdc64d6

verified ·

1 Parent(s): 73ad184

Update app.py

Browse files

Files changed (1) hide show

app.py +65 -46

app.py CHANGED Viewed

@@ -1,46 +1,82 @@
 import os
 import cv2
 import torch
-import tempfile
-import shutil
-import subprocess
 from PIL import Image
 import gradio as gr
 from gradio_imageslider import ImageSlider
 from depth_anything_v2.dpt import DepthAnythingV2
 DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
-# Model config (vitb local)
 model_configs = {
-    'vitb': {'encoder': 'vitb', 'features': 128, 'out_channels': [96, 192, 384, 768]}
 }
-encoder = 'vitb'
 model = DepthAnythingV2(**model_configs[encoder])
-checkpoint_path = f"checkpoints/depth_anything_v2_{encoder}.pth"
-state_dict = torch.load(checkpoint_path, map_location="cpu")
 model.load_state_dict(state_dict)
 model = model.to(DEVICE).eval()
 def predict_depth(frame_rgb):
     return model.infer_image(frame_rgb)
 def process_video(video_file):
-    temp_dir = tempfile.mkdtemp()
-    cap = cv2.VideoCapture(video_file.name)
     if not cap.isOpened() or int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) == 0:
         raise RuntimeError("Cannot open video or empty video file.")
     fps = cap.get(cv2.CAP_PROP_FPS)
-    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
     height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
-    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
     slider_frames = []
-    max_slider = 30
-    step = max(1, total_frames // max_slider)
     idx = 0
-    frame_idx = 0
     while True:
         ret, frame = cap.read()
@@ -48,48 +84,31 @@ def process_video(video_file):
             break
         frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
         depth_map = predict_depth(frame_rgb)
-        # Normalize to 0-255 grayscale
-        depth_gray = ((depth_map - depth_map.min()) / (depth_map.max() - depth_map.min()) * 255.0).astype('uint8')
-        img = Image.fromarray(depth_gray)
-        frame_path = os.path.join(temp_dir, f"{frame_idx:05d}.png")
-        img.save(frame_path)
-        frame_idx += 1
-        # Slider preview
         if idx % step == 0:
-            slider_frames.append(img)
         idx += 1
     cap.release()
-    # Output MP4 path
-    output_dir = "output"
-    os.makedirs(output_dir, exist_ok=True)
-    output_video = os.path.join(output_dir, os.path.basename(video_file.name).replace(".mp4","_depth.mp4"))
-    # FFmpeg encode PNG sequence → MP4, keep FPS & resolution
-    cmd = [
-        "ffmpeg",
-        "-y",
-        "-framerate", str(fps),
-        "-i", os.path.join(temp_dir, "%05d.png"),
-        "-c:v", "libx264",
-        "-pix_fmt", "yuv420p",
-        output_video
-    ]
-    subprocess.run(cmd, check=True)
-    shutil.rmtree(temp_dir)
     return slider_frames, output_video
 with gr.Blocks() as demo:
-    gr.Markdown("# Depth Anything V2 – Grayscale Video")
-    gr.Markdown("Upload a video and get a grayscale DepthMap video at original resolution & FPS.")
-    video_input = gr.File(label="Upload MP4", file_types=[".mp4"])
     depth_slider = ImageSlider(label="DepthMap Slider Preview")
     video_output = gr.Video(label="DepthMap Video")
     submit = gr.Button("Render DepthMap")
     submit.click(fn=process_video, inputs=[video_input], outputs=[depth_slider, video_output])
 if __name__ == "__main__":

 import os
+import shutil
 import cv2
+import numpy as np
 import torch
 from PIL import Image
 import gradio as gr
 from gradio_imageslider import ImageSlider
 from depth_anything_v2.dpt import DepthAnythingV2
+from huggingface_hub import hf_hub_download
+# ===============================
+# Auto-download checkpoint if missing
+# ===============================
+MODEL_PATH = "checkpoints/depth_anything_v2_vitl.pth"
+if not os.path.exists(MODEL_PATH):
+    print("Downloading Depth Anything V2 model (~1.3GB), please wait 1-3 minutes...")
+    hf_hub_download(
+        repo_id="niye4/depthmap-checkpoints",   # Repo containing checkpoint
+        filename="depth_anything_v2_vitl.pth",  # Actual filename
+        local_dir="checkpoints",
+        local_dir_use_symlinks=False
+    )
+    print("Model download complete! Starting the app...")
+else:
+    print("Model already exists, starting the app immediately!")
+# ===============================
+# Device and Model Setup
+# ===============================
 DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
 model_configs = {
+    'vits': {'encoder': 'vits', 'features': 64, 'out_channels': [48, 96, 192, 384]},
+    'vitb': {'encoder': 'vitb', 'features': 128, 'out_channels': [96, 192, 384, 768]},
+    'vitl': {'encoder': 'vitl', 'features': 256, 'out_channels': [256, 512, 1024, 1024]},
+    'vitg': {'encoder': 'vitg', 'features': 384, 'out_channels': [1536, 1536, 1536, 1536]}
 }
+encoder = 'vitl'
 model = DepthAnythingV2(**model_configs[encoder])
+state_dict = torch.load(MODEL_PATH, map_location="cpu")
 model.load_state_dict(state_dict)
 model = model.to(DEVICE).eval()
+# ===============================
+# Depth prediction for one frame
+# ===============================
 def predict_depth(frame_rgb):
     return model.infer_image(frame_rgb)
+# ===============================
+# Process video
+# ===============================
 def process_video(video_file):
+    OUTPUT_DIR = "output"
+    os.makedirs(OUTPUT_DIR, exist_ok=True)
+    video_path = os.path.join(OUTPUT_DIR, os.path.basename(video_file.name))
+    shutil.copy(video_file.name, video_path)
+    cap = cv2.VideoCapture(video_path)
     if not cap.isOpened() or int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) == 0:
         raise RuntimeError("Cannot open video or empty video file.")
     fps = cap.get(cv2.CAP_PROP_FPS)
+    width  = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
     height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+    output_video = os.path.join(OUTPUT_DIR, os.path.basename(video_path).replace(".mp4","_depth.mp4"))
+    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
+    out = cv2.VideoWriter(output_video, fourcc, fps, (width,height), isColor=True)
+    # Prepare slider preview frames
     slider_frames = []
+    max_slider_frames = 30
+    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+    step = max(1, total_frames // max_slider_frames)
     idx = 0
     while True:
         ret, frame = cap.read()
             break
         frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
         depth_map = predict_depth(frame_rgb)
+        depth_gray = ((depth_map - depth_map.min()) / (depth_map.max() - depth_map.min()) * 255.0).astype(np.uint8)
+        depth_rgb = cv2.cvtColor(depth_gray, cv2.COLOR_GRAY2BGR)
+        out.write(depth_rgb)
+        # Add frame to slider preview
         if idx % step == 0:
+            slider_frames.append(Image.fromarray(depth_gray))
         idx += 1
     cap.release()
+    out.release()
     return slider_frames, output_video
+# ===============================
+# Gradio Interface
+# ===============================
 with gr.Blocks() as demo:
+    gr.Markdown("# Depth Anything V2 – Grayscale Video (vitl)")
+    gr.Markdown("Upload a video and get a grayscale DepthMap video at original resolution and FPS.")
+    video_input = gr.File(label="Upload MP4", file_types=['.mp4'])
     depth_slider = ImageSlider(label="DepthMap Slider Preview")
     video_output = gr.Video(label="DepthMap Video")
     submit = gr.Button("Render DepthMap")
     submit.click(fn=process_video, inputs=[video_input], outputs=[depth_slider, video_output])
 if __name__ == "__main__":