File size: 4,421 Bytes
e6f85b6
2b2c9f7
e6f85b6
 
abea5f8
fdc64d6
cc36089
 
 
 
fdc64d6
e6f85b6
 
 
cc36089
9712ea3
e6f85b6
 
 
 
 
9712ea3
e6f85b6
 
 
 
 
 
 
 
9712ea3
 
 
 
 
e6f85b6
3879970
 
7a15f8c
9712ea3
 
 
3879970
9712ea3
e6f85b6
 
 
 
 
9712ea3
 
e6f85b6
 
 
2ee5061
 
 
e6f85b6
cc36089
e6f85b6
9712ea3
e6f85b6
 
9712ea3
e6f85b6
 
9712ea3
e6f85b6
 
 
9712ea3
e6f85b6
 
 
 
cc36089
2ee5061
9712ea3
 
2ee5061
e6f85b6
 
9712ea3
e6f85b6
 
 
9712ea3
e6f85b6
 
 
9712ea3
e6f85b6
9712ea3
e6f85b6
 
 
 
 
9712ea3
e6f85b6
9712ea3
e6f85b6
9712ea3
e6f85b6
9712ea3
e6f85b6
9712ea3
e6f85b6
 
 
 
9712ea3
 
 
 
 
 
 
2ee5061
 
e6f85b6
2ee5061
 
e6f85b6
9712ea3
2ee5061
e6f85b6
2b2c9f7
e6f85b6
9712ea3
e6f85b6
c2dcb0d
7b3e1ce
d944c3e
7b3e1ce
d944c3e
fdc64d6
9712ea3
 
 
 
 
 
 
fdc64d6
9712ea3
e6f85b6
2b2c9f7
 
1ce0f93
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
# app.py
import os
import shutil
import subprocess
import cv2
import numpy as np
import torch
from PIL import Image
import gradio as gr
from depth_anything_v2.dpt import DepthAnythingV2

# -------------------
# Configuration
# -------------------
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
CHECKPOINT = "checkpoints/depth_anything_v2_vitb.pth"  # vitb only
WORKDIR = "workspace"
FRAMES_DIR = os.path.join(WORKDIR, "frames")
OUT_FRAMES_DIR = os.path.join(WORKDIR, "depth_frames")
RAW_FRAMES_DIR = os.path.join(WORKDIR, "raw16")
OUTPUT_DIR = "output"

os.makedirs(FRAMES_DIR, exist_ok=True)
os.makedirs(OUT_FRAMES_DIR, exist_ok=True)
os.makedirs(RAW_FRAMES_DIR, exist_ok=True)
os.makedirs(OUTPUT_DIR, exist_ok=True)

# -------------------
# Load model (vitb)
# -------------------
model = DepthAnythingV2(
    encoder='vitb',
    features=128,
    out_channels=[96, 192, 384, 768]
)
state_dict = torch.load(CHECKPOINT, map_location="cpu")
model.load_state_dict(state_dict)
model = model.to(DEVICE).eval()

# -------------------
# Depth functions
# -------------------
def predict_depth(frame_rgb):
    """Return depth map float32 like original image app.py."""
    return model.infer_image(frame_rgb).astype(np.float32)

def depth_to_gray8(depth):
    dmin, dmax = float(depth.min()), float(depth.max())
    if dmax - dmin < 1e-8:
        return np.zeros_like(depth, dtype=np.uint8)
    norm = ((depth - dmin) / (dmax - dmin) * 255.0).astype(np.uint8)
    return norm

def clear_workspace():
    shutil.rmtree(WORKDIR, ignore_errors=True)
    os.makedirs(FRAMES_DIR, exist_ok=True)
    os.makedirs(OUT_FRAMES_DIR, exist_ok=True)
    os.makedirs(RAW_FRAMES_DIR, exist_ok=True)

# -------------------
# Main Processing
# -------------------
def process_video(video_file):
    """Extract → Infer each frame → Save → Merge → Return MP4 + preview frames."""
    clear_workspace()

    # Copy video to workspace
    in_path = os.path.join(WORKDIR, "input.mp4")
    shutil.copy(video_file.name, in_path)

    # Read FPS
    cap = cv2.VideoCapture(in_path)
    if not cap.isOpened():
        raise RuntimeError("Cannot open uploaded video.")
    fps = cap.get(cv2.CAP_PROP_FPS) or 30.0
    cap.release()

    # Extract PNG frames (lossless)
    subprocess.run([
        "ffmpeg", "-y",
        "-i", in_path,
        os.path.join(FRAMES_DIR, "frame_%06d.png")
    ], check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)

    frame_files = sorted(os.listdir(FRAMES_DIR))
    if len(frame_files) == 0:
        raise RuntimeError("No frames extracted.")

    preview_images = []
    total = len(frame_files)
    sample_step = max(1, total // 20)

    # Process frames
    for i, fname in enumerate(frame_files):
        fp = os.path.join(FRAMES_DIR, fname)
        bgr = cv2.imread(fp, cv2.IMREAD_COLOR)
        rgb = cv2.cvtColor(bgr, cv2.COLOR_BGR2RGB)

        depth = predict_depth(rgb)

        # Save raw 16-bit PNG
        raw16 = depth.astype(np.uint16)
        Image.fromarray(raw16).save(os.path.join(RAW_FRAMES_DIR, fname))

        # Save normalized grayscale preview
        gray8 = depth_to_gray8(depth)
        Image.fromarray(gray8).save(os.path.join(OUT_FRAMES_DIR, fname))

        if i % sample_step == 0:
            preview_images.append(Image.fromarray(gray8))

    # Merge video using ffmpeg
    out_video = os.path.join(
        OUTPUT_DIR,
        os.path.basename(video_file.name).replace(".mp4", "_depth.mp4")
    )

    subprocess.run([
        "ffmpeg", "-y",
        "-framerate", str(fps),
        "-i", os.path.join(OUT_FRAMES_DIR, "frame_%06d.png"),
        "-c:v", "libx264",
        "-pix_fmt", "yuv420p",
        out_video
    ], check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)

    return preview_images, out_video

# -------------------
# UI
# -------------------
with gr.Blocks() as demo:
    gr.Markdown("# Depth Anything V2 ")
    gr.Markdown(
        "https://github.com/DepthAnything/Depth-Anything-V2 "
    )

    video_in = gr.File(label="Upload a video (mp4)", file_types=[".mp4"])
    gallery = gr.Gallery(
        label="Preview Depth Frames",
        columns=5,
        height="auto"
    )
    out_video = gr.Video(label="Depthmap Video Output")

    btn = gr.Button("Render High-Quality Depth Video")
    btn.click(process_video, inputs=[video_in], outputs=[gallery, out_video])

if __name__ == "__main__":
    demo.queue().launch()