File size: 4,421 Bytes
e6f85b6 2b2c9f7 e6f85b6 abea5f8 fdc64d6 cc36089 fdc64d6 e6f85b6 cc36089 9712ea3 e6f85b6 9712ea3 e6f85b6 9712ea3 e6f85b6 3879970 7a15f8c 9712ea3 3879970 9712ea3 e6f85b6 9712ea3 e6f85b6 2ee5061 e6f85b6 cc36089 e6f85b6 9712ea3 e6f85b6 9712ea3 e6f85b6 9712ea3 e6f85b6 9712ea3 e6f85b6 cc36089 2ee5061 9712ea3 2ee5061 e6f85b6 9712ea3 e6f85b6 9712ea3 e6f85b6 9712ea3 e6f85b6 9712ea3 e6f85b6 9712ea3 e6f85b6 9712ea3 e6f85b6 9712ea3 e6f85b6 9712ea3 e6f85b6 9712ea3 e6f85b6 9712ea3 2ee5061 e6f85b6 2ee5061 e6f85b6 9712ea3 2ee5061 e6f85b6 2b2c9f7 e6f85b6 9712ea3 e6f85b6 c2dcb0d 7b3e1ce d944c3e 7b3e1ce d944c3e fdc64d6 9712ea3 fdc64d6 9712ea3 e6f85b6 2b2c9f7 1ce0f93 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 | # app.py
import os
import shutil
import subprocess
import cv2
import numpy as np
import torch
from PIL import Image
import gradio as gr
from depth_anything_v2.dpt import DepthAnythingV2
# -------------------
# Configuration
# -------------------
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
CHECKPOINT = "checkpoints/depth_anything_v2_vitb.pth" # vitb only
WORKDIR = "workspace"
FRAMES_DIR = os.path.join(WORKDIR, "frames")
OUT_FRAMES_DIR = os.path.join(WORKDIR, "depth_frames")
RAW_FRAMES_DIR = os.path.join(WORKDIR, "raw16")
OUTPUT_DIR = "output"
os.makedirs(FRAMES_DIR, exist_ok=True)
os.makedirs(OUT_FRAMES_DIR, exist_ok=True)
os.makedirs(RAW_FRAMES_DIR, exist_ok=True)
os.makedirs(OUTPUT_DIR, exist_ok=True)
# -------------------
# Load model (vitb)
# -------------------
model = DepthAnythingV2(
encoder='vitb',
features=128,
out_channels=[96, 192, 384, 768]
)
state_dict = torch.load(CHECKPOINT, map_location="cpu")
model.load_state_dict(state_dict)
model = model.to(DEVICE).eval()
# -------------------
# Depth functions
# -------------------
def predict_depth(frame_rgb):
"""Return depth map float32 like original image app.py."""
return model.infer_image(frame_rgb).astype(np.float32)
def depth_to_gray8(depth):
dmin, dmax = float(depth.min()), float(depth.max())
if dmax - dmin < 1e-8:
return np.zeros_like(depth, dtype=np.uint8)
norm = ((depth - dmin) / (dmax - dmin) * 255.0).astype(np.uint8)
return norm
def clear_workspace():
shutil.rmtree(WORKDIR, ignore_errors=True)
os.makedirs(FRAMES_DIR, exist_ok=True)
os.makedirs(OUT_FRAMES_DIR, exist_ok=True)
os.makedirs(RAW_FRAMES_DIR, exist_ok=True)
# -------------------
# Main Processing
# -------------------
def process_video(video_file):
"""Extract → Infer each frame → Save → Merge → Return MP4 + preview frames."""
clear_workspace()
# Copy video to workspace
in_path = os.path.join(WORKDIR, "input.mp4")
shutil.copy(video_file.name, in_path)
# Read FPS
cap = cv2.VideoCapture(in_path)
if not cap.isOpened():
raise RuntimeError("Cannot open uploaded video.")
fps = cap.get(cv2.CAP_PROP_FPS) or 30.0
cap.release()
# Extract PNG frames (lossless)
subprocess.run([
"ffmpeg", "-y",
"-i", in_path,
os.path.join(FRAMES_DIR, "frame_%06d.png")
], check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
frame_files = sorted(os.listdir(FRAMES_DIR))
if len(frame_files) == 0:
raise RuntimeError("No frames extracted.")
preview_images = []
total = len(frame_files)
sample_step = max(1, total // 20)
# Process frames
for i, fname in enumerate(frame_files):
fp = os.path.join(FRAMES_DIR, fname)
bgr = cv2.imread(fp, cv2.IMREAD_COLOR)
rgb = cv2.cvtColor(bgr, cv2.COLOR_BGR2RGB)
depth = predict_depth(rgb)
# Save raw 16-bit PNG
raw16 = depth.astype(np.uint16)
Image.fromarray(raw16).save(os.path.join(RAW_FRAMES_DIR, fname))
# Save normalized grayscale preview
gray8 = depth_to_gray8(depth)
Image.fromarray(gray8).save(os.path.join(OUT_FRAMES_DIR, fname))
if i % sample_step == 0:
preview_images.append(Image.fromarray(gray8))
# Merge video using ffmpeg
out_video = os.path.join(
OUTPUT_DIR,
os.path.basename(video_file.name).replace(".mp4", "_depth.mp4")
)
subprocess.run([
"ffmpeg", "-y",
"-framerate", str(fps),
"-i", os.path.join(OUT_FRAMES_DIR, "frame_%06d.png"),
"-c:v", "libx264",
"-pix_fmt", "yuv420p",
out_video
], check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
return preview_images, out_video
# -------------------
# UI
# -------------------
with gr.Blocks() as demo:
gr.Markdown("# Depth Anything V2 ")
gr.Markdown(
"https://github.com/DepthAnything/Depth-Anything-V2 "
)
video_in = gr.File(label="Upload a video (mp4)", file_types=[".mp4"])
gallery = gr.Gallery(
label="Preview Depth Frames",
columns=5,
height="auto"
)
out_video = gr.Video(label="Depthmap Video Output")
btn = gr.Button("Render High-Quality Depth Video")
btn.click(process_video, inputs=[video_in], outputs=[gallery, out_video])
if __name__ == "__main__":
demo.queue().launch() |