verymehari commited on
Commit
f8cbdb8
·
verified ·
1 Parent(s): b249e40

Upload 3 files

Browse files
Files changed (3) hide show
  1. README.md +7 -30
  2. app.py +89 -220
  3. requirements.txt +7 -6
README.md CHANGED
@@ -1,39 +1,16 @@
1
  ---
2
- title: DepthShift · Depth Generator
3
- emoji: 📐
4
  colorFrom: gray
5
- colorTo: green
6
  sdk: gradio
7
- sdk_version: "4.0.0"
8
  app_file: app.py
9
  pinned: false
10
- license: apache-2.0
11
  ---
12
 
13
- # DepthShift · Depth Map Generator
14
 
15
- Part of the [Spatial Index](https://spatial-index.vercel.app) pipeline — **scene_03 · DEPTHSHIFT**.
16
 
17
- Upload any MP4 video and get back a grayscale depth map video powered by **Depth Anything V2**, ready to drop straight into [DepthShift](https://project-depthshift.vercel.app) for 3D particle field visualization in WebXR.
18
-
19
- ## How it works
20
-
21
- 1. Upload your video
22
- 2. Adjust FPS and frame count
23
- 3. Hit **GENERATE DEPTH**
24
- 4. Download the depth video or PNG
25
- 5. Open DepthShift and upload both files → instant 3D point cloud
26
-
27
- ## Stack
28
-
29
- - **Depth Anything V2 Small** — fast, accurate monocular depth estimation
30
- - **Gradio + ZeroGPU** — free GPU inference on HuggingFace
31
- - **OpenCV** — frame extraction and video encoding
32
-
33
- ## Output formats
34
-
35
- | File | Use |
36
- |------|-----|
37
- | `depth.mp4` | Grayscale depth video → feed into DepthShift |
38
- | `sbs.mp4` | Side-by-side preview (original + Inferno colormap) |
39
- | `depth_frame0.png` | First frame depth PNG → for static DepthShift tests |
 
1
  ---
2
+ title: DepthShift Depth Generator
3
+ emoji:
4
  colorFrom: gray
5
+ colorTo: yellow
6
  sdk: gradio
7
+ sdk_version: 3.50.2
8
  app_file: app.py
9
  pinned: false
 
10
  ---
11
 
12
+ # DepthShift Depth Map Generator
13
 
14
+ Upload any MP4 and get a grayscale depth map video back, ready to load into [Spatial Index / DepthShift](https://spatial-index.vercel.app).
15
 
16
+ Powered by [Depth Anything V2 Small](https://huggingface.co/depth-anything/Depth-Anything-V2-Small-hf). Runs on CPU keep frames ≤ 30 for reasonable speed.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app.py CHANGED
@@ -2,276 +2,145 @@ import gradio as gr
2
  import torch
3
  import numpy as np
4
  import cv2
5
- import tempfile
6
- import os
7
  from PIL import Image
8
  from transformers import AutoImageProcessor, AutoModelForDepthEstimation
 
 
9
 
10
- # ── Model Small + half-res inference = tolerable on CPU ────────────────────
11
- MODEL_ID = "depth-anything/Depth-Anything-V2-Small-hf"
12
- print(f"Loading {MODEL_ID} on CPU…")
13
- processor = AutoImageProcessor.from_pretrained(MODEL_ID)
14
- model = AutoModelForDepthEstimation.from_pretrained(MODEL_ID)
15
  model.eval()
16
- print("Model ready.")
17
 
18
- INFER_SIZE = 256 # shrink frames before inference — big speed win on CPU
19
 
20
-
21
- # ── Inference ─────────────────────────────────────────────────────────────────
22
- def depth_frame(frame_bgr: np.ndarray, orig_w: int, orig_h: int) -> np.ndarray:
23
- """Run depth on a downscaled frame, upsample result back to original size."""
24
- small = cv2.resize(frame_bgr, (INFER_SIZE, INFER_SIZE))
25
- image = Image.fromarray(cv2.cvtColor(small, cv2.COLOR_BGR2RGB))
26
- inputs = processor(images=image, return_tensors="pt")
27
  with torch.no_grad():
28
  outputs = model(**inputs)
29
- post = processor.post_process_depth_estimation(
30
- outputs, target_sizes=[(INFER_SIZE, INFER_SIZE)]
31
- )
32
- depth = post[0]["predicted_depth"].numpy()
33
- depth_norm = (depth - depth.min()) / (depth.max() - depth.min() + 1e-8)
34
- depth_small = (depth_norm * 255).astype(np.uint8)
35
- # Upsample back to original resolution
36
- return cv2.resize(depth_small, (orig_w, orig_h), interpolation=cv2.INTER_LINEAR)
37
 
38
 
39
- def process_video(video_path: str, fps_out: int, max_frames: int, progress=gr.Progress()):
40
  if video_path is None:
41
- raise gr.Error("Please upload a video first.")
42
 
43
  cap = cv2.VideoCapture(video_path)
44
- total_src = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
 
 
 
 
45
  w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
46
  h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
47
 
48
- if total_src == 0 or w == 0 or h == 0:
49
- raise gr.Error("Could not read video. Please upload a valid MP4.")
50
-
51
- step = max(1, total_src // max_frames)
52
- frame_indices = list(range(0, total_src, step))[:max_frames]
53
 
 
54
  tmp_dir = tempfile.mkdtemp()
55
  depth_path = os.path.join(tmp_dir, "depth.mp4")
56
- sbs_path = os.path.join(tmp_dir, "sbs.mp4")
57
- png_path = os.path.join(tmp_dir, "depth_frame0.png")
58
 
59
  fourcc = cv2.VideoWriter_fourcc(*"mp4v")
60
  depth_writer = cv2.VideoWriter(depth_path, fourcc, fps_out, (w, h), isColor=False)
61
- sbs_writer = cv2.VideoWriter(sbs_path, fourcc, fps_out, (w * 2, h))
62
 
63
- first_saved = False
64
- n = len(frame_indices)
65
 
66
  for i, idx in enumerate(frame_indices):
67
- progress(i / n, desc=f"Frame {i+1}/{n} — this takes ~1–2s per frame on CPU")
68
  cap.set(cv2.CAP_PROP_POS_FRAMES, idx)
69
- ret, frame = cap.read()
70
  if not ret:
71
  continue
72
 
73
- depth_gray = depth_frame(frame, w, h)
74
- depth_writer.write(depth_gray)
75
- depth_color = cv2.applyColorMap(depth_gray, cv2.COLORMAP_INFERNO)
76
- sbs_writer.write(np.hstack([frame, depth_color]))
77
-
78
- if not first_saved:
79
- cv2.imwrite(png_path, depth_gray)
80
- first_saved = True
81
-
82
- cap.release()
83
- depth_writer.release()
84
- sbs_writer.release()
85
-
86
- return depth_path, sbs_path, png_path
87
 
 
 
88
 
89
- # ── UI ────────────────────────────────────────────────────────────────────────
90
- CSS = """
91
- @import url('https://fonts.googleapis.com/css2?family=Space+Mono:ital,wght@0,400;0,700;1,400&family=Syne:wght@400;700;800&display=swap');
92
 
93
- :root {
94
- --bg: #080808;
95
- --surface: #0f0f0f;
96
- --border: #1e1e1e;
97
- --accent: #c8ff00;
98
- --dim: #444;
99
- --text: #e8e8e8;
100
- --mono: 'Space Mono', monospace;
101
- --sans: 'Syne', sans-serif;
102
- }
103
 
104
- body, .gradio-container {
105
- background: var(--bg) !important;
106
- color: var(--text) !important;
107
- font-family: var(--mono) !important;
108
- }
109
-
110
- #header {
111
- border-bottom: 1px solid var(--border);
112
- padding: 2rem 0 1.5rem;
113
- margin-bottom: 2rem;
114
- }
115
- #header h1 {
116
- font-family: var(--sans) !important;
117
- font-weight: 800 !important;
118
- font-size: 2rem !important;
119
- letter-spacing: -0.02em;
120
- color: var(--text) !important;
121
- margin: 0 0 0.25rem !important;
122
- }
123
- #header p {
124
- font-size: 0.72rem;
125
- color: var(--dim);
126
- letter-spacing: 0.12em;
127
- text-transform: uppercase;
128
- margin: 0;
129
- }
130
- #live-dot {
131
- display: inline-block;
132
- width: 6px; height: 6px;
133
- background: var(--accent);
134
- border-radius: 50%;
135
- margin-right: 6px;
136
- animation: pulse 2s ease-in-out infinite;
137
- }
138
- @keyframes pulse {
139
- 0%, 100% { opacity: 1; }
140
- 50% { opacity: 0.3; }
141
- }
142
-
143
- .panel {
144
- background: var(--surface) !important;
145
- border: 1px solid var(--border) !important;
146
- border-radius: 2px !important;
147
- padding: 1.25rem !important;
148
- }
149
-
150
- label span, .label-wrap span {
151
- font-family: var(--mono) !important;
152
- font-size: 0.68rem !important;
153
- letter-spacing: 0.1em !important;
154
- text-transform: uppercase !important;
155
- color: var(--dim) !important;
156
- }
157
 
158
- input[type=range] { accent-color: var(--accent); }
 
 
 
 
159
 
160
- #run-btn {
161
- background: var(--accent) !important;
162
- color: #000 !important;
163
- font-family: var(--mono) !important;
164
- font-weight: 700 !important;
165
- font-size: 0.75rem !important;
166
- letter-spacing: 0.15em !important;
167
- text-transform: uppercase !important;
168
- border: none !important;
169
- border-radius: 1px !important;
170
- padding: 0.85rem 2rem !important;
171
- cursor: pointer;
172
- transition: opacity 0.15s;
173
- width: 100%;
174
- }
175
- #run-btn:hover { opacity: 0.85; }
176
 
177
- .timing-note {
178
- margin-top: 1.5rem;
179
- padding: 1rem;
180
- border: 1px solid #1e1e1e;
181
- font-size: 0.65rem;
182
- line-height: 2;
183
- color: #444;
184
- letter-spacing: 0.05em;
185
- }
186
 
187
- .next-steps {
188
- margin-top: 1rem;
189
- padding: 0.75rem 1rem;
190
- background: #0a0a0a;
191
- border: 1px solid #1a1a1a;
192
- font-size: 0.65rem;
193
- color: #555;
194
- letter-spacing: 0.05em;
195
- line-height: 2.2;
196
- }
197
 
198
- #footer-note {
199
- font-size: 0.65rem;
200
- color: var(--dim);
201
- letter-spacing: 0.08em;
202
- text-transform: uppercase;
203
- border-top: 1px solid var(--border);
204
- padding-top: 1.5rem;
205
- margin-top: 2rem;
206
- }
207
- #footer-note a { color: var(--accent); text-decoration: none; }
208
  """
209
 
210
- with gr.Blocks(css=CSS, title="DepthShift · Depth Generator") as demo:
211
-
212
  gr.HTML("""
213
- <div id="header">
214
- <h1><span id="live-dot"></span>DepthShift</h1>
215
- <p>scene_03 · Depth Map Generator · Depth Anything V2 · CPU · Free</p>
216
- </div>
 
 
217
  """)
218
 
219
  with gr.Row():
220
- with gr.Column(scale=1, elem_classes="panel"):
221
- gr.HTML('<div style="padding:0 0 0.5rem; font-size:0.68rem; letter-spacing:0.1em; text-transform:uppercase; color:#444;">01 · Upload</div>')
222
- video_in = gr.Video(label="Source Video (MP4)", sources=["upload"])
223
-
224
- gr.HTML('<div style="padding:1rem 0 0.5rem; font-size:0.68rem; letter-spacing:0.1em; text-transform:uppercase; color:#444;">02 · Parameters</div>')
225
- fps_slider = gr.Slider(minimum=6, maximum=24, value=8, step=1, label="Output FPS")
226
- frames_slider = gr.Slider(minimum=10, maximum=60, value=30, step=5,
227
- label="Max Frames (keep low on CPU)")
228
-
229
- gr.HTML('<div style="padding:0.75rem 0"></div>')
230
- run_btn = gr.Button("GENERATE DEPTH →", elem_id="run-btn")
231
-
232
- gr.HTML("""
233
- <div class="timing-note">
234
- MODEL · Depth Anything V2 Small<br>
235
- BACKEND · CPU · <b style="color:#c8ff00">Free forever</b><br>
236
- SPEED · ~1–2s per frame<br>
237
- 30 frames ≈ 1–2 min total<br>
238
- TIP · Keep max frames ≤ 30 for fast results
239
- </div>
240
- """)
241
-
242
- with gr.Column(scale=2, elem_classes="panel"):
243
- gr.HTML('<div style="padding:0 0 0.5rem; font-size:0.68rem; letter-spacing:0.1em; text-transform:uppercase; color:#444;">03 · Outputs</div>')
244
-
245
- with gr.Tabs():
246
- with gr.Tab("Side-by-Side Preview"):
247
- sbs_out = gr.Video(label="Original · Depth (Inferno)", autoplay=True)
248
- with gr.Tab("Depth Map (Grayscale)"):
249
- depth_out = gr.Video(label="Depth Video — feed into DepthShift", autoplay=True)
250
- with gr.Tab("Frame PNG"):
251
- png_out = gr.Image(label="First Frame Depth PNG")
252
-
253
- gr.HTML("""
254
- <div class="next-steps">
255
- ① Download the <b style="color:#c8ff00">Depth Video</b> or <b style="color:#c8ff00">Frame PNG</b><br>
256
- ② Open <a href="https://project-depthshift.vercel.app" target="_blank"
257
- style="color:#c8ff00">project-depthshift.vercel.app</a><br>
258
- ③ Upload source video + depth map → 3D particle field in WebXR
259
- </div>
260
- """)
261
 
262
  gr.HTML("""
263
- <div id="footer-note">
264
- Built for <a href="https://spatial-index.vercel.app">Spatial Index</a> ·
265
- Model: <a href="https://huggingface.co/depth-anything/Depth-Anything-V2-Small-hf">Depth Anything V2 Small</a> ·
266
- Apache-2.0 · $0.00
267
- </div>
 
 
268
  """)
269
 
270
  run_btn.click(
271
  fn=process_video,
272
  inputs=[video_in, fps_slider, frames_slider],
273
- outputs=[depth_out, sbs_out, png_out],
274
  )
275
 
276
- if __name__ == "__main__":
277
- demo.launch()
 
2
  import torch
3
  import numpy as np
4
  import cv2
 
 
5
  from PIL import Image
6
  from transformers import AutoImageProcessor, AutoModelForDepthEstimation
7
+ import tempfile
8
+ import os
9
 
10
+ # Load model at startup on CPU
11
+ print("Loading Depth Anything V2 Small...")
12
+ processor = AutoImageProcessor.from_pretrained("depth-anything/Depth-Anything-V2-Small-hf")
13
+ model = AutoModelForDepthEstimation.from_pretrained("depth-anything/Depth-Anything-V2-Small-hf")
 
14
  model.eval()
15
+ print("Model loaded.")
16
 
 
17
 
18
+ def estimate_depth(frame_rgb: np.ndarray) -> np.ndarray:
19
+ """Run depth estimation on a single RGB frame, return normalised uint8 depth."""
20
+ h, w = frame_rgb.shape[:2]
21
+ # Downscale for speed on CPU
22
+ small = cv2.resize(frame_rgb, (256, int(256 * h / w)))
23
+ pil_img = Image.fromarray(small)
24
+ inputs = processor(images=pil_img, return_tensors="pt")
25
  with torch.no_grad():
26
  outputs = model(**inputs)
27
+ depth = outputs.predicted_depth.squeeze().numpy()
28
+ depth_norm = cv2.normalize(depth, None, 0, 255, cv2.NORM_MINMAX).astype(np.uint8)
29
+ depth_full = cv2.resize(depth_norm, (w, h), interpolation=cv2.INTER_LINEAR)
30
+ return depth_full
 
 
 
 
31
 
32
 
33
+ def process_video(video_path, fps_out, max_frames, progress=gr.Progress()):
34
  if video_path is None:
35
+ return None, None, None
36
 
37
  cap = cv2.VideoCapture(video_path)
38
+ if not cap.isOpened():
39
+ raise gr.Error("Could not open video file.")
40
+
41
+ src_fps = cap.get(cv2.CAP_PROP_FPS) or 24
42
+ total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
43
  w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
44
  h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
45
 
46
+ # Sample every N frames to hit target fps_out
47
+ step = max(1, int(src_fps / fps_out))
48
+ frame_indices = list(range(0, min(total_frames, max_frames * step), step))[:max_frames]
 
 
49
 
50
+ # Output paths
51
  tmp_dir = tempfile.mkdtemp()
52
  depth_path = os.path.join(tmp_dir, "depth.mp4")
53
+ preview_path = os.path.join(tmp_dir, "preview.mp4")
 
54
 
55
  fourcc = cv2.VideoWriter_fourcc(*"mp4v")
56
  depth_writer = cv2.VideoWriter(depth_path, fourcc, fps_out, (w, h), isColor=False)
57
+ preview_writer = cv2.VideoWriter(preview_path, fourcc, fps_out, (w * 2, h))
58
 
59
+ first_depth_frame = None
 
60
 
61
  for i, idx in enumerate(frame_indices):
62
+ progress(i / len(frame_indices), desc=f"Processing frame {i+1}/{len(frame_indices)}")
63
  cap.set(cv2.CAP_PROP_POS_FRAMES, idx)
64
+ ret, frame_bgr = cap.read()
65
  if not ret:
66
  continue
67
 
68
+ frame_rgb = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2RGB)
69
+ depth = estimate_depth(frame_rgb)
 
 
 
 
 
 
 
 
 
 
 
 
70
 
71
+ if first_depth_frame is None:
72
+ first_depth_frame = depth
73
 
74
+ depth_writer.write(depth)
 
 
75
 
76
+ # Side-by-side preview: original | depth colourised
77
+ depth_color = cv2.applyColorMap(depth, cv2.COLORMAP_INFERNO)
78
+ depth_color_rgb = cv2.cvtColor(depth_color, cv2.COLOR_BGR2RGB)
79
+ side = np.concatenate([frame_rgb, depth_color_rgb], axis=1)
80
+ preview_writer.write(cv2.cvtColor(side, cv2.COLOR_RGB2BGR))
 
 
 
 
 
81
 
82
+ cap.release()
83
+ depth_writer.release()
84
+ preview_writer.release()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85
 
86
+ first_frame_png = None
87
+ if first_depth_frame is not None:
88
+ png_path = os.path.join(tmp_dir, "first_frame.png")
89
+ Image.fromarray(first_depth_frame).save(png_path)
90
+ first_frame_png = png_path
91
 
92
+ return depth_path, preview_path, first_frame_png
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
 
 
 
 
 
 
 
 
 
 
94
 
95
+ # ── UI ──────────────────────────────────────────────────────────────────────
 
 
 
 
 
 
 
 
 
96
 
97
+ css = """
98
+ body { background: #0a0a0a; color: #e0e0e0; font-family: 'Space Mono', monospace; }
99
+ .gradio-container { max-width: 900px; margin: 0 auto; }
100
+ h1 { color: #c8ff00; letter-spacing: 0.08em; font-size: 1.6rem; }
101
+ .label { color: #888; font-size: 0.75rem; letter-spacing: 0.1em; text-transform: uppercase; }
102
+ button.primary { background: #c8ff00 !important; color: #0a0a0a !important; font-weight: 700; border-radius: 2px !important; }
103
+ button.primary:hover { background: #b0e000 !important; }
104
+ .footer { color: #444; font-size: 0.7rem; text-align: center; margin-top: 2rem; }
 
 
105
  """
106
 
107
+ with gr.Blocks(css=css, title="DepthShift Depth Map Generator") as demo:
 
108
  gr.HTML("""
109
+ <link href="https://fonts.googleapis.com/css2?family=Space+Mono:wght@400;700&display=swap" rel="stylesheet">
110
+ <h1> DEPTHSHIFT / DEPTH GENERATOR</h1>
111
+ <p style="color:#888; font-size:0.85rem; margin-top:-0.5rem;">
112
+ Upload an MP4 → get a grayscale depth map video ready for
113
+ <a href="https://spatial-index.vercel.app" target="_blank" style="color:#c8ff00;">Spatial Index</a>
114
+ </p>
115
  """)
116
 
117
  with gr.Row():
118
+ with gr.Column():
119
+ video_in = gr.Video(label="Input Video (MP4)", interactive=True)
120
+ with gr.Row():
121
+ fps_slider = gr.Slider(6, 24, value=12, step=1, label="Output FPS (keep low for CPU speed)")
122
+ frames_slider = gr.Slider(10, 60, value=30, step=5, label="Max Frames")
123
+ run_btn = gr.Button("Generate Depth Map", variant="primary")
124
+
125
+ with gr.Column():
126
+ depth_out = gr.Video(label="Depth Map (grayscale) — use this in DepthShift")
127
+ preview_out = gr.Video(label="Preview (original | depth side-by-side)")
128
+ frame_out = gr.Image(label="First Frame Depth PNG")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
129
 
130
  gr.HTML("""
131
+ <div class="footer">
132
+ <b style="color:#c8ff00">HOW TO USE</b><br>
133
+ 1. Upload your MP4 &nbsp;→&nbsp;
134
+ 2. Download the depth map video &nbsp;→&nbsp;
135
+ 3. Load both into <a href="https://spatial-index.vercel.app" style="color:#c8ff00">Spatial Index / DepthShift</a>
136
+ <br><br>Processing runs on CPU — keep Max Frames ≤ 30 for reasonable wait times (~1–2 min).
137
+ </div>
138
  """)
139
 
140
  run_btn.click(
141
  fn=process_video,
142
  inputs=[video_in, fps_slider, frames_slider],
143
+ outputs=[depth_out, preview_out, frame_out],
144
  )
145
 
146
+ demo.launch()
 
requirements.txt CHANGED
@@ -1,6 +1,7 @@
1
- gradio>=4.0.0
2
- torch>=2.0.0
3
- transformers>=4.38.0
4
- opencv-python-headless>=4.8.0
5
- Pillow>=10.0.0
6
- numpy>=1.24.0
 
 
1
+ gradio==3.50.2
2
+ torch==2.1.0
3
+ torchvision==0.16.0
4
+ transformers==4.40.0
5
+ opencv-python-headless==4.9.0.80
6
+ numpy==1.26.4
7
+ Pillow==10.3.0