shriarul5273 commited on
Commit
6cd978e
Β·
1 Parent(s): e0f1d2e

add app.py file and requirnments.txt

Browse files
Files changed (1) hide show
  1. app.py +437 -0
app.py ADDED
@@ -0,0 +1,437 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Depth Anything Comparison Demo (v1 vs v2) - ZeroGPU Version
3
+
4
+ Compare different Depth Anything models (v1 and v2) side-by-side or with a slider using Gradio.
5
+ Optimized for HuggingFace Spaces with ZeroGPU support.
6
+ """
7
+
8
+ import os
9
+ import sys
10
+ import logging
11
+ import gc
12
+ import tempfile
13
+ from pathlib import Path
14
+ from typing import Optional, Tuple, Dict, List
15
+ import numpy as np
16
+ import cv2
17
+ import gradio as gr
18
+ from PIL import Image
19
+ from huggingface_hub import hf_hub_download
20
+ import spaces
21
+
22
+ # Import v1 and v2 model code
23
+ sys.path.insert(0, os.path.join(os.path.dirname(__file__), "Depth-Anything"))
24
+ sys.path.insert(0, os.path.join(os.path.dirname(__file__), "Depth-Anything-V2"))
25
+
26
+ # v1 imports
27
+ from depth_anything.dpt import DepthAnything as DepthAnythingV1
28
+ from depth_anything.util.transform import Resize, NormalizeImage, PrepareForNet
29
+ import torch
30
+ import torch.nn.functional as F
31
+ from torchvision.transforms import Compose
32
+
33
+ # v2 imports
34
+ from depth_anything_v2.dpt import DepthAnythingV2
35
+
36
+ import matplotlib
37
+
38
+ # Logging
39
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
40
+
41
+ # Device selection - ZeroGPU will handle GPU allocation
42
+ DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
43
+
44
+ # Model configs
45
+ V1_MODEL_CONFIGS = {
46
+ "vits14": {
47
+ "model_name": "LiheYoung/depth_anything_vits14",
48
+ "display_name": "Depth Anything v1 ViT-S (Small, Fastest)"
49
+ },
50
+ "vitb14": {
51
+ "model_name": "LiheYoung/depth_anything_vitb14",
52
+ "display_name": "Depth Anything v1 ViT-B (Base, Balanced)"
53
+ },
54
+ "vitl14": {
55
+ "model_name": "LiheYoung/depth_anything_vitl14",
56
+ "display_name": "Depth Anything v1 ViT-L (Large, Best Quality)"
57
+ }
58
+ }
59
+
60
+ V2_MODEL_CONFIGS = {
61
+ 'vits': {
62
+ 'display_name': 'Depth Anything v2 ViT-Small',
63
+ 'checkpoint': 'Depth-Anything-V2/checkpoints/depth_anything_v2_vits.pth',
64
+ 'features': 64, 'out_channels': [48, 96, 192, 384]
65
+ },
66
+ 'vitb': {
67
+ 'display_name': 'Depth Anything v2 ViT-Base',
68
+ 'checkpoint': 'Depth-Anything-V2/checkpoints/depth_anything_v2_vitb.pth',
69
+ 'features': 128, 'out_channels': [96, 192, 384, 768]
70
+ },
71
+ 'vitl': {
72
+ 'display_name': 'Depth Anything v2 ViT-Large',
73
+ 'checkpoint': 'Depth-Anything-V2/checkpoints/depth_anything_v2_vitl.pth',
74
+ 'features': 256, 'out_channels': [256, 512, 1024, 1024]
75
+ }
76
+ }
77
+
78
+ # Model cache - cleared after each inference for ZeroGPU
79
+ _v1_models = {}
80
+ _v2_models = {}
81
+
82
+ # v1 transform
83
+ v1_transform = Compose([
84
+ Resize(width=518, height=518, resize_target=False, keep_aspect_ratio=True, ensure_multiple_of=14, resize_method='lower_bound', image_interpolation_method=cv2.INTER_CUBIC),
85
+ NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
86
+ PrepareForNet(),
87
+ ])
88
+
89
+ def load_v1_model(key: str):
90
+ """Load v1 model with memory management for ZeroGPU"""
91
+ if key in _v1_models:
92
+ return _v1_models[key]
93
+
94
+ # Clear cache to free memory
95
+ clear_model_cache()
96
+
97
+ model = DepthAnythingV1.from_pretrained(V1_MODEL_CONFIGS[key]["model_name"]).to(DEVICE).eval()
98
+ _v1_models[key] = model
99
+ return model
100
+
101
+ def load_v2_model(key: str):
102
+ """Load v2 model with memory management for ZeroGPU"""
103
+ if key in _v2_models:
104
+ return _v2_models[key]
105
+
106
+ # Clear cache to free memory
107
+ clear_model_cache()
108
+
109
+ config = V2_MODEL_CONFIGS[key]
110
+ model = DepthAnythingV2(encoder=key, features=config['features'], out_channels=config['out_channels'])
111
+
112
+ # Try to download from HF Hub first, fallback to local checkpoint
113
+ try:
114
+ # Map variant to model names used in HF Hub
115
+ model_name_mapping = {
116
+ 'vits': 'Small',
117
+ 'vitb': 'Base',
118
+ 'vitl': 'Large'
119
+ }
120
+
121
+ model_name = model_name_mapping.get(key, 'Large') # Default to Large
122
+ filename = f"depth_anything_v2_{key}.pth"
123
+
124
+ # Try to download from HF Hub first
125
+ try:
126
+ filepath = hf_hub_download(
127
+ repo_id=f"depth-anything/Depth-Anything-V2-{model_name}",
128
+ filename=filename,
129
+ repo_type="model"
130
+ )
131
+ logging.info(f"Downloaded V2 model from HF Hub: {filepath}")
132
+ checkpoint_path = filepath
133
+ except Exception as e:
134
+ logging.warning(f"Failed to download V2 model from HF Hub: {e}")
135
+ # Fallback to local checkpoint
136
+ checkpoint_path = config['checkpoint']
137
+ if not os.path.exists(checkpoint_path):
138
+ raise FileNotFoundError(f"Neither HF Hub download nor local checkpoint available: {checkpoint_path}")
139
+ logging.info(f"Using local V2 checkpoint: {checkpoint_path}")
140
+
141
+ state_dict = torch.load(checkpoint_path, map_location=DEVICE)
142
+ except Exception as e:
143
+ logging.error(f"Failed to load V2 model {key}: {e}")
144
+ raise
145
+
146
+ model.load_state_dict(state_dict)
147
+ model = model.to(DEVICE).eval()
148
+ _v2_models[key] = model
149
+ return model
150
+
151
+ def clear_model_cache():
152
+ """Clear model cache to free GPU memory for ZeroGPU"""
153
+ global _v1_models, _v2_models
154
+ for model in _v1_models.values():
155
+ del model
156
+ for model in _v2_models.values():
157
+ del model
158
+ _v1_models.clear()
159
+ _v2_models.clear()
160
+ gc.collect()
161
+ if torch.cuda.is_available():
162
+ torch.cuda.empty_cache()
163
+
164
+ def predict_v1(model, image: np.ndarray) -> np.ndarray:
165
+ h, w = image.shape[:2]
166
+ image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) / 255.0
167
+ image = v1_transform({'image': image})['image']
168
+ image = torch.from_numpy(image).unsqueeze(0).to(DEVICE)
169
+ with torch.no_grad():
170
+ depth = model(image)
171
+ depth = F.interpolate(depth[None], (h, w), mode='bilinear', align_corners=False)[0, 0]
172
+ return depth.cpu().numpy()
173
+
174
+ def predict_v2(model, image: np.ndarray) -> np.ndarray:
175
+ with torch.no_grad():
176
+ depth = model.infer_image(image[:, :, ::-1]) # BGR to RGB
177
+ return depth
178
+
179
+ def colorize_depth(depth: np.ndarray) -> np.ndarray:
180
+ depth_norm = (depth - depth.min()) / (depth.max() - depth.min() + 1e-8)
181
+ depth_uint8 = (depth_norm * 255).astype(np.uint8)
182
+ cmap = matplotlib.colormaps.get_cmap('Spectral_r')
183
+ colored = (cmap(depth_uint8)[:, :, :3] * 255).astype(np.uint8)
184
+ return colored
185
+
186
+ def get_model_choices() -> List[Tuple[str, str]]:
187
+ choices = []
188
+ for k, v in V1_MODEL_CONFIGS.items():
189
+ choices.append((v['display_name'], f'v1_{k}'))
190
+ for k, v in V2_MODEL_CONFIGS.items():
191
+ choices.append((v['display_name'], f'v2_{k}'))
192
+ return choices
193
+
194
+ @spaces.GPU
195
+ def run_model(model_key: str, image: np.ndarray) -> Tuple[np.ndarray, str]:
196
+ """Run model inference with ZeroGPU optimization"""
197
+ try:
198
+ if model_key.startswith('v1_'):
199
+ key = model_key[3:]
200
+ model = load_v1_model(key)
201
+ depth = predict_v1(model, image)
202
+ label = V1_MODEL_CONFIGS[key]['display_name']
203
+ else:
204
+ key = model_key[3:]
205
+ model = load_v2_model(key)
206
+ depth = predict_v2(model, image)
207
+ label = V2_MODEL_CONFIGS[key]['display_name']
208
+
209
+ colored = colorize_depth(depth)
210
+ return colored, label
211
+ finally:
212
+ # Clean up GPU memory after inference
213
+ if torch.cuda.is_available():
214
+ torch.cuda.empty_cache()
215
+
216
+ @spaces.GPU
217
+ def compare_models(image, model1: str, model2: str, progress=gr.Progress()) -> Tuple[np.ndarray, str]:
218
+ """Compare two models with ZeroGPU optimization"""
219
+ if image is None:
220
+ return None, "❌ Please upload an image."
221
+
222
+ try:
223
+ # Convert image to numpy array if needed
224
+ if isinstance(image, str):
225
+ # If it's a file path
226
+ image = cv2.imread(image)
227
+ elif hasattr(image, 'save'):
228
+ # If it's a PIL Image
229
+ image = np.array(image)
230
+ if len(image.shape) == 3 and image.shape[2] == 3:
231
+ image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
232
+
233
+ progress(0.1, desc=f"Running {model1}")
234
+ out1, label1 = run_model(model1, image)
235
+ progress(0.5, desc=f"Running {model2}")
236
+ out2, label2 = run_model(model2, image)
237
+
238
+ h, w = out1.shape[:2]
239
+ canvas = np.ones((h + 40, w * 2 + 20, 3), dtype=np.uint8) * 255
240
+ canvas[40:40+h, 10:10+w] = out1
241
+ canvas[40:40+h, w+20:w*2+20] = out2
242
+
243
+ font = cv2.FONT_HERSHEY_SIMPLEX
244
+ font_scale = 0.7
245
+ thickness = 2
246
+ size1 = cv2.getTextSize(label1, font, font_scale, thickness)[0]
247
+ size2 = cv2.getTextSize(label2, font, font_scale, thickness)[0]
248
+ cv2.putText(canvas, label1, (10 + (w - size1[0]) // 2, 28), font, font_scale, (0,0,0), thickness)
249
+ cv2.putText(canvas, label2, (w+20 + (w - size2[0]) // 2, 28), font, font_scale, (0,0,0), thickness)
250
+
251
+ progress(1.0, desc="Done")
252
+ return canvas, f"**{label1}** vs **{label2}**"
253
+
254
+ finally:
255
+ # Clean up GPU memory after inference
256
+ clear_model_cache()
257
+
258
+ @spaces.GPU
259
+ def slider_compare(image, model1: str, model2: str, progress=gr.Progress()):
260
+ """Slider comparison with ZeroGPU optimization"""
261
+ if image is None:
262
+ return None, "❌ Please upload an image."
263
+
264
+ try:
265
+ # Convert image to numpy array if needed
266
+ if isinstance(image, str):
267
+ # If it's a file path
268
+ image = cv2.imread(image)
269
+ elif hasattr(image, 'save'):
270
+ # If it's a PIL Image
271
+ image = np.array(image)
272
+ if len(image.shape) == 3 and image.shape[2] == 3:
273
+ image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
274
+
275
+ progress(0.1, desc=f"Running {model1}")
276
+ out1, label1 = run_model(model1, image)
277
+ progress(0.5, desc=f"Running {model2}")
278
+ out2, label2 = run_model(model2, image)
279
+
280
+ def add_label(img, label):
281
+ h, w = img.shape[:2]
282
+ canvas = np.ones((h+40, w, 3), dtype=np.uint8) * 255
283
+ canvas[40:, :] = img
284
+ font = cv2.FONT_HERSHEY_SIMPLEX
285
+ font_scale = 0.7
286
+ thickness = 2
287
+ size = cv2.getTextSize(label, font, font_scale, thickness)[0]
288
+ cv2.putText(canvas, label, ((w-size[0])//2, 28), font, font_scale, (0,0,0), thickness)
289
+ return canvas
290
+
291
+ return (add_label(out1, label1), add_label(out2, label2)), f"Slider: **{label1}** vs **{label2}**"
292
+
293
+ finally:
294
+ # Clean up GPU memory after inference
295
+ clear_model_cache()
296
+
297
+ @spaces.GPU
298
+ def single_inference(image, model: str, progress=gr.Progress()):
299
+ """Single model inference with ZeroGPU optimization"""
300
+ if image is None:
301
+ return None, "❌ Please upload an image."
302
+
303
+ try:
304
+ # Convert image to numpy array if needed
305
+ if isinstance(image, str):
306
+ # If it's a file path
307
+ image = cv2.imread(image)
308
+ elif hasattr(image, 'save'):
309
+ # If it's a PIL Image
310
+ image = np.array(image)
311
+ if len(image.shape) == 3 and image.shape[2] == 3:
312
+ image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
313
+
314
+ progress(0.1, desc=f"Running {model}")
315
+ out, label = run_model(model, image)
316
+ progress(1.0, desc="Done")
317
+ return out, f"**{label}**"
318
+
319
+ finally:
320
+ # Clean up GPU memory after inference
321
+ clear_model_cache()
322
+
323
+ def get_example_images() -> List[str]:
324
+ import re
325
+
326
+ def natural_sort_key(filename):
327
+ """Sort filenames with numbers naturally (demo1, demo2, ..., demo10, demo11)"""
328
+ # Split by numbers and convert numeric parts to integers for proper sorting
329
+ return [int(part) if part.isdigit() else part for part in re.split(r'(\d+)', filename)]
330
+
331
+ # Try both v1 and v2 examples
332
+ examples = []
333
+ for ex_dir in ["assets/examples", "Depth-Anything/assets/examples", "Depth-Anything-V2/assets/examples"]:
334
+ ex_path = os.path.join(os.path.dirname(__file__), ex_dir)
335
+ if os.path.exists(ex_path):
336
+ # Get all image files and sort them naturally
337
+ all_files = [f for f in os.listdir(ex_path) if f.lower().endswith(('.png', '.jpg', '.jpeg'))]
338
+ sorted_files = sorted(all_files, key=natural_sort_key)
339
+ files = [os.path.join(ex_path, f) for f in sorted_files]
340
+ examples.extend(files)
341
+ return examples
342
+
343
+ def get_paginated_examples(examples: List[str], page: int = 0, per_page: int = 6) -> Tuple[List[str], int, bool, bool]:
344
+ """Get paginated examples with navigation info"""
345
+ total_pages = (len(examples) - 1) // per_page + 1 if examples else 0
346
+ start_idx = page * per_page
347
+ end_idx = min(start_idx + per_page, len(examples))
348
+
349
+ current_examples = examples[start_idx:end_idx]
350
+ has_prev = page > 0
351
+ has_next = page < total_pages - 1
352
+
353
+ return current_examples, total_pages, has_prev, has_next
354
+
355
+ def create_app():
356
+ model_choices = get_model_choices()
357
+ default1 = model_choices[0][1]
358
+ default2 = model_choices[1][1]
359
+
360
+ with gr.Blocks(title="Depth Anything v1 vs v2 Comparison", theme=gr.themes.Soft()) as app:
361
+ gr.Markdown("""
362
+ # Depth Anything v1 vs v2 Comparison
363
+ Compare different Depth Anything models (v1 and v2) side-by-side or with a slider.
364
+
365
+ ⚑ **Running on ZeroGPU** - GPU resources are allocated automatically for inference.
366
+ """)
367
+
368
+ with gr.Tabs():
369
+ with gr.Tab("🎚️ Slider Comparison"):
370
+ with gr.Row():
371
+ img_input2 = gr.Image(label="Input Image")
372
+ with gr.Column():
373
+ m1s = gr.Dropdown(choices=model_choices, label="Model A", value=default1)
374
+ m2s = gr.Dropdown(choices=model_choices, label="Model B", value=default2)
375
+ btn2 = gr.Button("Slider Compare", variant="primary")
376
+ slider = gr.ImageSlider(label="Model Comparison Slider")
377
+ slider_status = gr.Markdown()
378
+ btn2.click(slider_compare, inputs=[img_input2, m1s, m2s], outputs=[slider, slider_status], show_progress=True)
379
+
380
+ # Examples for slider comparison
381
+ ex_imgs = get_example_images()
382
+ if ex_imgs:
383
+ def slider_example_fn(image):
384
+ return slider_compare(image, default1, default2)
385
+ examples2 = gr.Examples(examples=ex_imgs, inputs=[img_input2], outputs=[slider, slider_status], fn=slider_example_fn)
386
+
387
+ with gr.Tab("πŸ” Method Comparison"):
388
+ with gr.Row():
389
+ img_input = gr.Image(label="Input Image")
390
+ with gr.Column():
391
+ m1 = gr.Dropdown(choices=model_choices, label="Model 1", value=default1)
392
+ m2 = gr.Dropdown(choices=model_choices, label="Model 2", value=default2)
393
+ btn = gr.Button("Compare", variant="primary")
394
+ out_img = gr.Image(label="Comparison Result")
395
+ out_status = gr.Markdown()
396
+ btn.click(compare_models, inputs=[img_input, m1, m2], outputs=[out_img, out_status], show_progress=True)
397
+
398
+ # Examples for method comparison
399
+ if ex_imgs:
400
+ def compare_example_fn(image):
401
+ return compare_models(image, default1, default2)
402
+ examples = gr.Examples(examples=ex_imgs, inputs=[img_input], outputs=[out_img, out_status], fn=compare_example_fn)
403
+
404
+ with gr.Tab("πŸ”¬ Single Model"):
405
+ with gr.Row():
406
+ img_input3 = gr.Image(label="Input Image")
407
+ with gr.Column():
408
+ m_single = gr.Dropdown(choices=model_choices, label="Model", value=default1)
409
+ btn3 = gr.Button("Run", variant="primary")
410
+ out_single = gr.Image(label="Depth Result")
411
+ out_single_status = gr.Markdown()
412
+ btn3.click(single_inference, inputs=[img_input3, m_single], outputs=[out_single, out_single_status], show_progress=True)
413
+
414
+ # Examples for single model
415
+ if ex_imgs:
416
+ def single_example_fn(image):
417
+ return single_inference(image, default1)
418
+ examples3 = gr.Examples(examples=ex_imgs, inputs=[img_input3], outputs=[out_single, out_single_status], fn=single_example_fn)
419
+
420
+ gr.Markdown("""
421
+ ---
422
+ **References:**
423
+ - **v1**: [Depth Anything v1](https://github.com/LiheYoung/Depth-Anything)
424
+ - **v2**: [Depth Anything v2](https://github.com/DepthAnything/Depth-Anything-V2)
425
+
426
+ **Note**: This app uses ZeroGPU for efficient GPU resource management. Models are loaded on-demand and GPU memory is automatically cleaned up after each inference.
427
+ """)
428
+
429
+ return app
430
+
431
+ def main():
432
+ logging.info("πŸš€ Starting Depth Anything Comparison App on ZeroGPU...")
433
+ app = create_app()
434
+ app.queue().launch(show_error=True)
435
+
436
+ if __name__ == "__main__":
437
+ main()