niye4 commited on
Commit
2ee5061
Β·
verified Β·
1 Parent(s): fa50454

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +92 -60
app.py CHANGED
@@ -4,6 +4,9 @@ import numpy as np
4
  import torch
5
  from PIL import Image
6
  import gradio as gr
 
 
 
7
  from depth_anything_v2.dpt import DepthAnythingV2
8
 
9
  # ===============================
@@ -13,6 +16,7 @@ DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
13
 
14
  MODEL_PATH = "checkpoints/depth_anything_v2_vitb.pth"
15
  model = DepthAnythingV2(encoder='vitb', features=128, out_channels=[96,192,384,768])
 
16
  state_dict = torch.load(MODEL_PATH, map_location="cpu")
17
  model.load_state_dict(state_dict)
18
  model = model.to(DEVICE).eval()
@@ -21,90 +25,118 @@ model = model.to(DEVICE).eval()
21
  # Predict depth for single frame
22
  # ===============================
23
  def predict_depth(frame_rgb):
24
- """Return depth map as float32"""
25
  depth = model.infer_image(frame_rgb)
26
  return depth.astype(np.float32)
27
 
28
  # ===============================
29
- # Normalize to 0-255 grayscale
30
  # ===============================
31
  def depth_to_grayscale(depth):
32
- norm = (depth - depth.min()) / (depth.max() - depth.min() + 1e-8)
33
- gray = (norm * 255).astype(np.uint8)
34
- return gray
35
 
36
  # ===============================
37
- # Process video
38
  # ===============================
39
  def process_video(video_file):
40
- """
41
- Render grayscale depthmap video.
42
- Keep original resolution & FPS.
43
- """
 
 
 
 
44
  OUTPUT_DIR = "output"
45
- os.makedirs(OUTPUT_DIR, exist_ok=True)
46
 
47
- video_path = os.path.join(OUTPUT_DIR, os.path.basename(video_file.name))
48
- # Copy input video
49
- import shutil
50
- shutil.copy(video_file.name, video_path)
51
 
52
- cap = cv2.VideoCapture(video_path)
53
- if not cap.isOpened() or int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) == 0:
54
- raise RuntimeError("Cannot open video or empty video file.")
55
 
 
 
 
 
56
  fps = cap.get(cv2.CAP_PROP_FPS)
57
- width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
58
- height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
59
-
60
- # Video output path
61
- output_video_path = os.path.join(OUTPUT_DIR, os.path.basename(video_path).replace(".mp4","_depth.mp4"))
62
- fourcc = cv2.VideoWriter_fourcc(*'mp4v')
63
- out = cv2.VideoWriter(output_video_path, fourcc, fps, (width,height), isColor=False)
64
-
65
- # Slider preview (sample frames)
66
- slider_frames = []
67
- max_slider_frames = 30
68
- total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
69
- step = max(1, total_frames // max_slider_frames)
70
- idx = 0
71
-
72
- while True:
73
- ret, frame = cap.read()
74
- if not ret:
75
- break
76
-
77
- frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
78
- depth_map = predict_depth(frame_rgb)
79
- gray_frame = depth_to_grayscale(depth_map)
80
- out.write(gray_frame)
81
-
82
- # Add sampled frames for slider preview
83
- if idx % step == 0:
84
- slider_frames.append(Image.fromarray(gray_frame))
85
- idx += 1
86
-
87
  cap.release()
88
- out.release()
89
- return slider_frames, output_video_path
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90
 
91
  # ===============================
92
- # Gradio Interface
93
  # ===============================
94
  with gr.Blocks() as demo:
95
- gr.Markdown("# Depth Anything V2 – Grayscale Depth Video (vitb)")
 
96
  gr.Markdown(
97
- "Upload an MP4 video to generate a **grayscale DepthMap video**.\n\n"
98
- "**Model:** vitb – fast and high quality for video processing.\n"
99
- "Resolution and FPS are preserved from the original video."
 
 
100
  )
101
 
102
- video_input = gr.File(label="Upload MP4", file_types=['.mp4'])
103
- depth_slider = gr.Gallery(label="DepthMap Slider Preview", elem_id="depth_slider")
104
- video_output = gr.Video(label="DepthMap Video")
105
- submit = gr.Button("Render DepthMap")
 
 
 
106
 
107
- submit.click(fn=process_video, inputs=[video_input], outputs=[depth_slider, video_output])
108
 
109
  if __name__ == "__main__":
110
  demo.queue().launch()
 
4
  import torch
5
  from PIL import Image
6
  import gradio as gr
7
+ import subprocess
8
+ import shutil
9
+
10
  from depth_anything_v2.dpt import DepthAnythingV2
11
 
12
  # ===============================
 
16
 
17
  MODEL_PATH = "checkpoints/depth_anything_v2_vitb.pth"
18
  model = DepthAnythingV2(encoder='vitb', features=128, out_channels=[96,192,384,768])
19
+
20
  state_dict = torch.load(MODEL_PATH, map_location="cpu")
21
  model.load_state_dict(state_dict)
22
  model = model.to(DEVICE).eval()
 
25
  # Predict depth for single frame
26
  # ===============================
27
  def predict_depth(frame_rgb):
 
28
  depth = model.infer_image(frame_rgb)
29
  return depth.astype(np.float32)
30
 
31
  # ===============================
32
+ # Normalize grayscale (0–255)
33
  # ===============================
34
  def depth_to_grayscale(depth):
35
+ dmin, dmax = depth.min(), depth.max()
36
+ norm = (depth - dmin) / (dmax - dmin + 1e-8)
37
+ return (norm * 255).astype(np.uint8)
38
 
39
  # ===============================
40
+ # Main video processing (FFMPEG)
41
  # ===============================
42
  def process_video(video_file):
43
+
44
+ # -----------------------------
45
+ # Setup directories
46
+ # -----------------------------
47
+ INPUT_PATH = video_file.name
48
+ WORKDIR = "workspace"
49
+ FRAMES_DIR = "workspace/frames"
50
+ OUT_FRAMES_DIR = "workspace/depth_frames"
51
  OUTPUT_DIR = "output"
 
52
 
53
+ shutil.rmtree(WORKDIR, ignore_errors=True)
54
+ os.makedirs(FRAMES_DIR, exist_ok=True)
55
+ os.makedirs(OUT_FRAMES_DIR, exist_ok=True)
56
+ os.makedirs(OUTPUT_DIR, exist_ok=True)
57
 
58
+ # Copy input into workspace
59
+ input_copy = os.path.join(WORKDIR, "input.mp4")
60
+ shutil.copy(INPUT_PATH, input_copy)
61
 
62
+ # -----------------------------
63
+ # Extract FPS & resolution
64
+ # -----------------------------
65
+ cap = cv2.VideoCapture(input_copy)
66
  fps = cap.get(cv2.CAP_PROP_FPS)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
  cap.release()
68
+
69
+ # -----------------------------
70
+ # FFMPEG extract frames
71
+ # -----------------------------
72
+ extract_cmd = [
73
+ "ffmpeg", "-y",
74
+ "-i", input_copy,
75
+ f"{FRAMES_DIR}/frame_%06d.png"
76
+ ]
77
+ subprocess.run(extract_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
78
+
79
+ # -----------------------------
80
+ # Process each frame β†’ Depth
81
+ # -----------------------------
82
+ frames = sorted(os.listdir(FRAMES_DIR))
83
+ slider_preview = []
84
+
85
+ for idx, f in enumerate(frames):
86
+ frame_path = os.path.join(FRAMES_DIR, f)
87
+ img = cv2.imread(frame_path)
88
+
89
+ rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
90
+ depth = predict_depth(rgb)
91
+ gray = depth_to_grayscale(depth)
92
+
93
+ out_path = os.path.join(OUT_FRAMES_DIR, f)
94
+ Image.fromarray(gray).save(out_path)
95
+
96
+ # For slider preview (limit to 20 images)
97
+ if idx % max(1, len(frames)//20) == 0:
98
+ slider_preview.append(Image.fromarray(gray))
99
+
100
+ # -----------------------------
101
+ # Rebuild video with FFMPEG
102
+ # -----------------------------
103
+ output_video = os.path.join(OUTPUT_DIR, "depth_output.mp4")
104
+
105
+ merge_cmd = [
106
+ "ffmpeg", "-y",
107
+ "-framerate", str(fps),
108
+ "-i", f"{OUT_FRAMES_DIR}/frame_%06d.png",
109
+ "-c:v", "libx264",
110
+ "-pix_fmt", "yuv420p",
111
+ output_video
112
+ ]
113
+ subprocess.run(merge_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
114
+
115
+ return slider_preview, output_video
116
+
117
 
118
  # ===============================
119
+ # Gradio UI
120
  # ===============================
121
  with gr.Blocks() as demo:
122
+
123
+ gr.Markdown("# Depth Anything V2 – High-Quality Grayscale Depth Video (vitb)")
124
  gr.Markdown(
125
+ "This version uses **FFmpeg frame extraction** for the best possible quality.\n\n"
126
+ "- βœ” Preserves original resolution & FPS\n"
127
+ "- βœ” Frame-by-frame depth rendering (sharpest result)\n"
128
+ "- βœ” Model: **vitb** (fast & clean)\n"
129
+ "- βœ” Output: grayscale depth video"
130
  )
131
 
132
+ video_input = gr.File(label="Upload MP4 Video", file_types=[".mp4"])
133
+ depth_slider = gr.Gallery(label="Preview")
134
+ output_video = gr.Video(label="Depth Video Output")
135
+
136
+ render_btn = gr.Button("Render DepthMap")
137
+
138
+ render_btn.click(process_video, inputs=[video_input], outputs=[depth_slider, output_video])
139
 
 
140
 
141
  if __name__ == "__main__":
142
  demo.queue().launch()