Spaces:

niye4
/

depthmap

Build error

App Files Files Community

niye4 commited on Dec 3, 2025

Commit

2ee5061

verified ·

1 Parent(s): fa50454

Update app.py

Browse files

Files changed (1) hide show

app.py +92 -60

app.py CHANGED Viewed

@@ -4,6 +4,9 @@ import numpy as np
 import torch
 from PIL import Image
 import gradio as gr
 from depth_anything_v2.dpt import DepthAnythingV2
 # ===============================
@@ -13,6 +16,7 @@ DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
 MODEL_PATH = "checkpoints/depth_anything_v2_vitb.pth"
 model = DepthAnythingV2(encoder='vitb', features=128, out_channels=[96,192,384,768])
 state_dict = torch.load(MODEL_PATH, map_location="cpu")
 model.load_state_dict(state_dict)
 model = model.to(DEVICE).eval()
@@ -21,90 +25,118 @@ model = model.to(DEVICE).eval()
 # Predict depth for single frame
 # ===============================
 def predict_depth(frame_rgb):
-    """Return depth map as float32"""
     depth = model.infer_image(frame_rgb)
     return depth.astype(np.float32)
 # ===============================
-# Normalize to 0-255 grayscale
 # ===============================
 def depth_to_grayscale(depth):
-    norm = (depth - depth.min()) / (depth.max() - depth.min() + 1e-8)
-    gray = (norm * 255).astype(np.uint8)
-    return gray
 # ===============================
-# Process video
 # ===============================
 def process_video(video_file):
-    """
-    Render grayscale depthmap video.
-    Keep original resolution & FPS.
-    """
     OUTPUT_DIR = "output"
-    os.makedirs(OUTPUT_DIR, exist_ok=True)
-    video_path = os.path.join(OUTPUT_DIR, os.path.basename(video_file.name))
-    # Copy input video
-    import shutil
-    shutil.copy(video_file.name, video_path)
-    cap = cv2.VideoCapture(video_path)
-    if not cap.isOpened() or int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) == 0:
-        raise RuntimeError("Cannot open video or empty video file.")
     fps = cap.get(cv2.CAP_PROP_FPS)
-    width  = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
-    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
-    # Video output path
-    output_video_path = os.path.join(OUTPUT_DIR, os.path.basename(video_path).replace(".mp4","_depth.mp4"))
-    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
-    out = cv2.VideoWriter(output_video_path, fourcc, fps, (width,height), isColor=False)
-    # Slider preview (sample frames)
-    slider_frames = []
-    max_slider_frames = 30
-    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
-    step = max(1, total_frames // max_slider_frames)
-    idx = 0
-    while True:
-        ret, frame = cap.read()
-        if not ret:
-            break
-        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
-        depth_map = predict_depth(frame_rgb)
-        gray_frame = depth_to_grayscale(depth_map)
-        out.write(gray_frame)
-        # Add sampled frames for slider preview
-        if idx % step == 0:
-            slider_frames.append(Image.fromarray(gray_frame))
-        idx += 1
     cap.release()
-    out.release()
-    return slider_frames, output_video_path
 # ===============================
-# Gradio Interface
 # ===============================
 with gr.Blocks() as demo:
-    gr.Markdown("# Depth Anything V2 – Grayscale Depth Video (vitb)")
     gr.Markdown(
-        "Upload an MP4 video to generate a **grayscale DepthMap video**.\n\n"
-        "**Model:** vitb – fast and high quality for video processing.\n"
-        "Resolution and FPS are preserved from the original video."
     )
-    video_input = gr.File(label="Upload MP4", file_types=['.mp4'])
-    depth_slider = gr.Gallery(label="DepthMap Slider Preview", elem_id="depth_slider")
-    video_output = gr.Video(label="DepthMap Video")
-    submit = gr.Button("Render DepthMap")
-    submit.click(fn=process_video, inputs=[video_input], outputs=[depth_slider, video_output])
 if __name__ == "__main__":
     demo.queue().launch()

 import torch
 from PIL import Image
 import gradio as gr
+import subprocess
+import shutil
 from depth_anything_v2.dpt import DepthAnythingV2
 # ===============================
 MODEL_PATH = "checkpoints/depth_anything_v2_vitb.pth"
 model = DepthAnythingV2(encoder='vitb', features=128, out_channels=[96,192,384,768])
 state_dict = torch.load(MODEL_PATH, map_location="cpu")
 model.load_state_dict(state_dict)
 model = model.to(DEVICE).eval()
 # Predict depth for single frame
 # ===============================
 def predict_depth(frame_rgb):
     depth = model.infer_image(frame_rgb)
     return depth.astype(np.float32)
 # ===============================
+# Normalize grayscale (0–255)
 # ===============================
 def depth_to_grayscale(depth):
+    dmin, dmax = depth.min(), depth.max()
+    norm = (depth - dmin) / (dmax - dmin + 1e-8)
+    return (norm * 255).astype(np.uint8)
 # ===============================
+# Main video processing (FFMPEG)
 # ===============================
 def process_video(video_file):
+    # -----------------------------
+    # Setup directories
+    # -----------------------------
+    INPUT_PATH = video_file.name
+    WORKDIR = "workspace"
+    FRAMES_DIR = "workspace/frames"
+    OUT_FRAMES_DIR = "workspace/depth_frames"
     OUTPUT_DIR = "output"
+    shutil.rmtree(WORKDIR, ignore_errors=True)
+    os.makedirs(FRAMES_DIR, exist_ok=True)
+    os.makedirs(OUT_FRAMES_DIR, exist_ok=True)
+    os.makedirs(OUTPUT_DIR, exist_ok=True)
+    # Copy input into workspace
+    input_copy = os.path.join(WORKDIR, "input.mp4")
+    shutil.copy(INPUT_PATH, input_copy)
+    # -----------------------------
+    # Extract FPS & resolution
+    # -----------------------------
+    cap = cv2.VideoCapture(input_copy)
     fps = cap.get(cv2.CAP_PROP_FPS)
     cap.release()
+    # -----------------------------
+    # FFMPEG extract frames
+    # -----------------------------
+    extract_cmd = [
+        "ffmpeg", "-y",
+        "-i", input_copy,
+        f"{FRAMES_DIR}/frame_%06d.png"
+    ]
+    subprocess.run(extract_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+    # -----------------------------
+    # Process each frame → Depth
+    # -----------------------------
+    frames = sorted(os.listdir(FRAMES_DIR))
+    slider_preview = []
+    for idx, f in enumerate(frames):
+        frame_path = os.path.join(FRAMES_DIR, f)
+        img = cv2.imread(frame_path)
+        rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+        depth = predict_depth(rgb)
+        gray = depth_to_grayscale(depth)
+        out_path = os.path.join(OUT_FRAMES_DIR, f)
+        Image.fromarray(gray).save(out_path)
+        # For slider preview (limit to 20 images)
+        if idx % max(1, len(frames)//20) == 0:
+            slider_preview.append(Image.fromarray(gray))
+    # -----------------------------
+    # Rebuild video with FFMPEG
+    # -----------------------------
+    output_video = os.path.join(OUTPUT_DIR, "depth_output.mp4")
+    merge_cmd = [
+        "ffmpeg", "-y",
+        "-framerate", str(fps),
+        "-i", f"{OUT_FRAMES_DIR}/frame_%06d.png",
+        "-c:v", "libx264",
+        "-pix_fmt", "yuv420p",
+        output_video
+    ]
+    subprocess.run(merge_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+    return slider_preview, output_video
 # ===============================
+# Gradio UI
 # ===============================
 with gr.Blocks() as demo:
+    gr.Markdown("# Depth Anything V2 – High-Quality Grayscale Depth Video (vitb)")
     gr.Markdown(
+        "This version uses **FFmpeg frame extraction** for the best possible quality.\n\n"
+        "- ✔ Preserves original resolution & FPS\n"
+        "- ✔ Frame-by-frame depth rendering (sharpest result)\n"
+        "- ✔ Model: **vitb** (fast & clean)\n"
+        "- ✔ Output: grayscale depth video"
     )
+    video_input = gr.File(label="Upload MP4 Video", file_types=[".mp4"])
+    depth_slider = gr.Gallery(label="Preview")
+    output_video = gr.Video(label="Depth Video Output")
+    render_btn = gr.Button("Render DepthMap")
+    render_btn.click(process_video, inputs=[video_input], outputs=[depth_slider, output_video])
 if __name__ == "__main__":
     demo.queue().launch()