wwieerrz Claude commited on
Commit
191a797
Β·
1 Parent(s): 4473137

πŸ”₯ Add REAL AI Models - Depth-Anything V2!

Browse files

MAJOR UPDATE:
- Add real AI depth estimation using Depth-Anything V2
- Auto-download 97MB SMALL model from Hugging Face
- Graceful fallback to Demo Mode if models fail
- Update all 3 tabs to use real AI

NEW FEATURES:
- Real AI depth estimation in all functions
- Auto model download on first run
- Smart error handling with fallback
- Updated README with real AI features

TECHNICAL:
- Add torch>=2.0.0 and transformers>=4.30.0
- Copy transformers_depth.py to HF Space
- Remove unused onnxruntime-gpu
- Update requirements.txt for real AI

READY FOR DEPLOYMENT!

πŸ€– Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

Files changed (4) hide show
  1. README.md +3 -2
  2. app.py +30 -11
  3. backend/utils/transformers_depth.py +153 -0
  4. requirements.txt +5 -2
README.md CHANGED
@@ -23,10 +23,11 @@ Transform 2D images into stunning 3D depth visualizations with state-of-the-art
23
  ## ✨ Features
24
 
25
  ### 🎯 Advanced Depth Estimation
26
- - **Fast Preview Mode** - Real-time depth estimation (~50-100ms)
 
27
  - **High Quality Mode** - Production-grade accuracy (~500-1500ms)
28
  - **Multiple Colormaps** - Inferno, Viridis, Plasma, Turbo, Magma, Hot, Ocean, Rainbow
29
- - **Demo Mode** - Works instantly without downloading models!
30
 
31
  ### 🎬 Visualization Options
32
  - **Colored Depth Maps** - Beautiful visualization with customizable color schemes
 
23
  ## ✨ Features
24
 
25
  ### 🎯 Advanced Depth Estimation
26
+ - **REAL AI Models** - Depth-Anything V2 from Hugging Face Transformers! πŸ”₯
27
+ - **Fast Preview Mode** - Real-time depth estimation (~100-500ms)
28
  - **High Quality Mode** - Production-grade accuracy (~500-1500ms)
29
  - **Multiple Colormaps** - Inferno, Viridis, Plasma, Turbo, Magma, Hot, Ocean, Rainbow
30
+ - **Auto-Fallback** - Gracefully falls back to Demo Mode if models fail to load
31
 
32
  ### 🎬 Visualization Options
33
  - **Colored Depth Maps** - Beautiful visualization with customizable color schemes
app.py CHANGED
@@ -16,27 +16,41 @@ import sys
16
  sys.path.append(str(Path(__file__).parent / "backend"))
17
 
18
  # Import backend utilities
19
- from backend.utils.demo_depth import generate_smart_depth
20
  from backend.utils.image_processing import (
21
  depth_to_colormap,
22
  create_side_by_side
23
  )
24
 
25
- print("[*] DimensioDepth starting in DEMO MODE")
26
- print("[*] Using synthetic depth estimation (no model downloads needed)")
 
 
 
 
 
 
 
 
 
 
27
 
28
 
29
  def estimate_depth(image, quality_mode="Fast (Preview)", colormap_style="Inferno"):
30
  """
31
- Estimate depth from an input image using DEMO MODE
32
  """
33
  try:
34
  # Convert PIL to numpy if needed
35
  if isinstance(image, Image.Image):
36
  image = np.array(image)
37
 
38
- # Generate depth map using DEMO MODE
39
- depth = generate_smart_depth(image)
 
 
 
 
 
40
 
41
  # Convert colormap style to cv2 constant
42
  colormap_dict = {
@@ -61,13 +75,12 @@ def estimate_depth(image, quality_mode="Fast (Preview)", colormap_style="Inferno
61
  info = f"""
62
  ### βœ… Depth Estimation Complete!
63
 
64
- **Mode**: DEMO MODE (Synthetic Depth)
65
  **Input Size**: {image.shape[1]}x{image.shape[0]}
66
  **Output Size**: {depth.shape[1]}x{depth.shape[0]}
67
  **Colormap**: {colormap_style}
68
- **Processing**: Ultra-fast (<50ms)
69
 
70
- The DEMO MODE uses advanced edge detection + intensity analysis to create surprisingly good depth maps!
71
  """
72
 
73
  return depth_colored, depth_gray, info
@@ -85,7 +98,10 @@ def create_side_by_side_comparison(image, quality_mode="Fast (Preview)", colorma
85
  image = np.array(image)
86
 
87
  # Get depth estimation
88
- depth = generate_smart_depth(image)
 
 
 
89
 
90
  # Convert colormap
91
  colormap_dict = {
@@ -120,7 +136,10 @@ def create_3d_visualization(image, depth_map, parallax_strength=0.5):
120
 
121
  if depth_map is None:
122
  # Generate depth if not provided
123
- depth_map = generate_smart_depth(image)
 
 
 
124
  depth_map = (depth_map * 255).astype(np.uint8)
125
  elif isinstance(depth_map, Image.Image):
126
  depth_map = np.array(depth_map)
 
16
  sys.path.append(str(Path(__file__).parent / "backend"))
17
 
18
  # Import backend utilities
 
19
  from backend.utils.image_processing import (
20
  depth_to_colormap,
21
  create_side_by_side
22
  )
23
 
24
+ # Try to import REAL AI model
25
+ try:
26
+ from backend.utils.transformers_depth import TransformersDepthEstimator
27
+ print("[*] Loading REAL AI Depth-Anything V2 model...")
28
+ depth_estimator = TransformersDepthEstimator(model_size="small")
29
+ print("[+] REAL AI MODE ACTIVE!")
30
+ USE_REAL_AI = True
31
+ except Exception as e:
32
+ print(f"[!] Could not load AI models: {e}")
33
+ print("[*] Falling back to DEMO MODE")
34
+ from backend.utils.demo_depth import generate_smart_depth
35
+ USE_REAL_AI = False
36
 
37
 
38
  def estimate_depth(image, quality_mode="Fast (Preview)", colormap_style="Inferno"):
39
  """
40
+ Estimate depth from an input image using REAL AI or DEMO MODE
41
  """
42
  try:
43
  # Convert PIL to numpy if needed
44
  if isinstance(image, Image.Image):
45
  image = np.array(image)
46
 
47
+ # Generate depth map
48
+ if USE_REAL_AI:
49
+ depth = depth_estimator.predict(image)
50
+ mode_text = "REAL AI (Depth-Anything V2)"
51
+ else:
52
+ depth = generate_smart_depth(image)
53
+ mode_text = "DEMO MODE (Synthetic)"
54
 
55
  # Convert colormap style to cv2 constant
56
  colormap_dict = {
 
75
  info = f"""
76
  ### βœ… Depth Estimation Complete!
77
 
78
+ **Mode**: {mode_text}
79
  **Input Size**: {image.shape[1]}x{image.shape[0]}
80
  **Output Size**: {depth.shape[1]}x{depth.shape[0]}
81
  **Colormap**: {colormap_style}
 
82
 
83
+ {f"**Powered by**: Depth-Anything V2 SMALL (97MB)" if USE_REAL_AI else "**Processing**: Ultra-fast (<50ms) synthetic depth"}
84
  """
85
 
86
  return depth_colored, depth_gray, info
 
98
  image = np.array(image)
99
 
100
  # Get depth estimation
101
+ if USE_REAL_AI:
102
+ depth = depth_estimator.predict(image)
103
+ else:
104
+ depth = generate_smart_depth(image)
105
 
106
  # Convert colormap
107
  colormap_dict = {
 
136
 
137
  if depth_map is None:
138
  # Generate depth if not provided
139
+ if USE_REAL_AI:
140
+ depth_map = depth_estimator.predict(image)
141
+ else:
142
+ depth_map = generate_smart_depth(image)
143
  depth_map = (depth_map * 255).astype(np.uint8)
144
  elif isinstance(depth_map, Image.Image):
145
  depth_map = np.array(depth_map)
backend/utils/transformers_depth.py ADDED
@@ -0,0 +1,153 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Real AI Depth Estimation using Hugging Face Transformers
3
+ Uses Depth-Anything V2 directly (no ONNX conversion needed!)
4
+ """
5
+
6
+ import numpy as np
7
+ import torch
8
+ from PIL import Image
9
+ from transformers import AutoImageProcessor, AutoModelForDepthEstimation
10
+
11
+ class TransformersDepthEstimator:
12
+ """
13
+ Depth estimation using Hugging Face Transformers
14
+ Easier than ONNX - works directly with PyTorch models!
15
+ """
16
+
17
+ def __init__(self, model_size="small", device=None, cache_dir=None):
18
+ """
19
+ Initialize depth estimator
20
+
21
+ Args:
22
+ model_size: "small", "base", or "large"
23
+ device: "cuda", "cpu", or None (auto-detect)
24
+ cache_dir: Where to cache models (default: project folder)
25
+ """
26
+ self.model_size = model_size
27
+
28
+ # Auto-detect device if not specified
29
+ if device is None:
30
+ self.device = "cuda" if torch.cuda.is_available() else "cpu"
31
+ else:
32
+ self.device = device
33
+
34
+ # Set cache directory to project folder
35
+ if cache_dir is None:
36
+ from pathlib import Path
37
+ cache_dir = Path(__file__).parent.parent / "models" / "cache" / "huggingface"
38
+ cache_dir.mkdir(parents=True, exist_ok=True)
39
+ cache_dir = str(cache_dir)
40
+
41
+ print(f"[*] Loading Depth-Anything V2 {model_size.upper()} model...")
42
+ print(f"[*] Device: {self.device.upper()}")
43
+ print(f"[*] Cache dir: {cache_dir}")
44
+
45
+ # Model repository mapping
46
+ model_map = {
47
+ "small": "depth-anything/Depth-Anything-V2-Small-hf",
48
+ "base": "depth-anything/Depth-Anything-V2-Base-hf",
49
+ "large": "depth-anything/Depth-Anything-V2-Large-hf"
50
+ }
51
+
52
+ if model_size not in model_map:
53
+ raise ValueError(f"Invalid model_size. Choose from: {list(model_map.keys())}")
54
+
55
+ repo_id = model_map[model_size]
56
+
57
+ # Load processor and model with custom cache directory
58
+ self.processor = AutoImageProcessor.from_pretrained(
59
+ repo_id,
60
+ cache_dir=cache_dir
61
+ )
62
+ self.model = AutoModelForDepthEstimation.from_pretrained(
63
+ repo_id,
64
+ cache_dir=cache_dir
65
+ )
66
+
67
+ # Move model to device
68
+ self.model.to(self.device)
69
+ self.model.eval()
70
+
71
+ print(f"[+] Model loaded successfully!")
72
+ print(f"[+] Cached in: {cache_dir}")
73
+
74
+ def predict(self, image):
75
+ """
76
+ Predict depth map for an image
77
+
78
+ Args:
79
+ image: numpy array (H, W, 3) in RGB format
80
+
81
+ Returns:
82
+ depth: numpy array (H, W) with depth values [0, 1]
83
+ """
84
+ # Convert numpy to PIL if needed
85
+ if isinstance(image, np.ndarray):
86
+ image_pil = Image.fromarray(image)
87
+ else:
88
+ image_pil = image
89
+
90
+ # Prepare image
91
+ inputs = self.processor(images=image_pil, return_tensors="pt")
92
+
93
+ # Move inputs to device
94
+ inputs = {k: v.to(self.device) for k, v in inputs.items()}
95
+
96
+ # Inference
97
+ with torch.no_grad():
98
+ outputs = self.model(**inputs)
99
+ predicted_depth = outputs.predicted_depth
100
+
101
+ # Interpolate to original size
102
+ prediction = torch.nn.functional.interpolate(
103
+ predicted_depth.unsqueeze(1),
104
+ size=image_pil.size[::-1],
105
+ mode="bicubic",
106
+ align_corners=False,
107
+ )
108
+
109
+ # Convert to numpy and normalize
110
+ depth = prediction.squeeze().cpu().numpy()
111
+
112
+ # Normalize to [0, 1]
113
+ depth = (depth - depth.min()) / (depth.max() - depth.min())
114
+
115
+ return depth
116
+
117
+
118
+ # Test function
119
+ if __name__ == "__main__":
120
+ import cv2
121
+
122
+ print("=" * 70)
123
+ print(" Testing Depth-Anything V2 with Transformers")
124
+ print("=" * 70)
125
+
126
+ # Create estimator
127
+ estimator = TransformersDepthEstimator(model_size="small")
128
+
129
+ # Create test image
130
+ print("[*] Creating test image...")
131
+ test_image = np.random.randint(0, 255, (518, 518, 3), dtype=np.uint8)
132
+
133
+ # Predict depth
134
+ print("[*] Running depth estimation...")
135
+ import time
136
+ start = time.time()
137
+ depth = estimator.predict(test_image)
138
+ elapsed = (time.time() - start) * 1000
139
+
140
+ print(f"[+] Depth estimation complete!")
141
+ print(f"[+] Processing time: {elapsed:.2f}ms")
142
+ print(f"[+] Output shape: {depth.shape}")
143
+ print(f"[+] Depth range: [{depth.min():.3f}, {depth.max():.3f}]")
144
+
145
+ print("\n" + "=" * 70)
146
+ print(" SUCCESS! Real AI Depth Estimation Working!")
147
+ print("=" * 70)
148
+ print("\nYou can now use real AI depth estimation!")
149
+ print("\nTo use in your app:")
150
+ print(" from backend.utils.transformers_depth import TransformersDepthEstimator")
151
+ print(" estimator = TransformersDepthEstimator('small')")
152
+ print(" depth = estimator.predict(image)")
153
+ print("=" * 70)
requirements.txt CHANGED
@@ -1,13 +1,16 @@
1
  # Gradio and UI
2
  gradio==4.44.1
3
 
 
 
 
 
4
  # Core ML and image processing
5
- onnxruntime-gpu==1.20.1
6
  opencv-python==4.10.0.84
7
  Pillow>=8.0,<11.0
8
  numpy==1.26.4
9
 
10
- # Optional: For downloading models from HuggingFace
11
  huggingface-hub==0.27.0
12
 
13
  # Utilities
 
1
  # Gradio and UI
2
  gradio==4.44.1
3
 
4
+ # Real AI Models - Depth-Anything V2
5
+ torch>=2.0.0
6
+ transformers>=4.30.0
7
+
8
  # Core ML and image processing
 
9
  opencv-python==4.10.0.84
10
  Pillow>=8.0,<11.0
11
  numpy==1.26.4
12
 
13
+ # For downloading models from HuggingFace
14
  huggingface-hub==0.27.0
15
 
16
  # Utilities