Spaces:

dimdimz
/

DimensioDepth

Sleeping

wwieerrz Claude commited on Nov 2

Commit

191a797

1 Parent(s): 4473137

🔥 Add REAL AI Models - Depth-Anything V2!

MAJOR UPDATE:
- Add real AI depth estimation using Depth-Anything V2
- Auto-download 97MB SMALL model from Hugging Face
- Graceful fallback to Demo Mode if models fail
- Update all 3 tabs to use real AI

NEW FEATURES:
- Real AI depth estimation in all functions
- Auto model download on first run
- Smart error handling with fallback
- Updated README with real AI features

TECHNICAL:
- Add torch>=2.0.0 and transformers>=4.30.0
- Copy transformers_depth.py to HF Space
- Remove unused onnxruntime-gpu
- Update requirements.txt for real AI

READY FOR DEPLOYMENT!

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

Files changed (4) hide show

README.md +3 -2
app.py +30 -11
backend/utils/transformers_depth.py +153 -0
requirements.txt +5 -2

README.md CHANGED Viewed

@@ -23,10 +23,11 @@ Transform 2D images into stunning 3D depth visualizations with state-of-the-art
 ## ✨ Features
 ### 🎯 Advanced Depth Estimation
-- **Fast Preview Mode** - Real-time depth estimation (~50-100ms)
 - **High Quality Mode** - Production-grade accuracy (~500-1500ms)
 - **Multiple Colormaps** - Inferno, Viridis, Plasma, Turbo, Magma, Hot, Ocean, Rainbow
-- **Demo Mode** - Works instantly without downloading models!
 ### 🎬 Visualization Options
 - **Colored Depth Maps** - Beautiful visualization with customizable color schemes

 ## ✨ Features
 ### 🎯 Advanced Depth Estimation
+- **REAL AI Models** - Depth-Anything V2 from Hugging Face Transformers! 🔥
+- **Fast Preview Mode** - Real-time depth estimation (~100-500ms)
 - **High Quality Mode** - Production-grade accuracy (~500-1500ms)
 - **Multiple Colormaps** - Inferno, Viridis, Plasma, Turbo, Magma, Hot, Ocean, Rainbow
+- **Auto-Fallback** - Gracefully falls back to Demo Mode if models fail to load
 ### 🎬 Visualization Options
 - **Colored Depth Maps** - Beautiful visualization with customizable color schemes

app.py CHANGED Viewed

@@ -16,27 +16,41 @@ import sys
 sys.path.append(str(Path(__file__).parent / "backend"))
 # Import backend utilities
-from backend.utils.demo_depth import generate_smart_depth
 from backend.utils.image_processing import (
     depth_to_colormap,
     create_side_by_side
 )
-print("[*] DimensioDepth starting in DEMO MODE")
-print("[*] Using synthetic depth estimation (no model downloads needed)")
 def estimate_depth(image, quality_mode="Fast (Preview)", colormap_style="Inferno"):
     """
-    Estimate depth from an input image using DEMO MODE
     """
     try:
         # Convert PIL to numpy if needed
         if isinstance(image, Image.Image):
             image = np.array(image)
-        # Generate depth map using DEMO MODE
-        depth = generate_smart_depth(image)
         # Convert colormap style to cv2 constant
         colormap_dict = {
@@ -61,13 +75,12 @@ def estimate_depth(image, quality_mode="Fast (Preview)", colormap_style="Inferno
         info = f"""
 ### ✅ Depth Estimation Complete!
-**Mode**: DEMO MODE (Synthetic Depth)
 **Input Size**: {image.shape[1]}x{image.shape[0]}
 **Output Size**: {depth.shape[1]}x{depth.shape[0]}
 **Colormap**: {colormap_style}
-**Processing**: Ultra-fast (<50ms)
-The DEMO MODE uses advanced edge detection + intensity analysis to create surprisingly good depth maps!
 """
         return depth_colored, depth_gray, info
@@ -85,7 +98,10 @@ def create_side_by_side_comparison(image, quality_mode="Fast (Preview)", colorma
             image = np.array(image)
         # Get depth estimation
-        depth = generate_smart_depth(image)
         # Convert colormap
         colormap_dict = {
@@ -120,7 +136,10 @@ def create_3d_visualization(image, depth_map, parallax_strength=0.5):
         if depth_map is None:
             # Generate depth if not provided
-            depth_map = generate_smart_depth(image)
             depth_map = (depth_map * 255).astype(np.uint8)
         elif isinstance(depth_map, Image.Image):
             depth_map = np.array(depth_map)

 sys.path.append(str(Path(__file__).parent / "backend"))
 # Import backend utilities
 from backend.utils.image_processing import (
     depth_to_colormap,
     create_side_by_side
 )
+# Try to import REAL AI model
+try:
+    from backend.utils.transformers_depth import TransformersDepthEstimator
+    print("[*] Loading REAL AI Depth-Anything V2 model...")
+    depth_estimator = TransformersDepthEstimator(model_size="small")
+    print("[+] REAL AI MODE ACTIVE!")
+    USE_REAL_AI = True
+except Exception as e:
+    print(f"[!] Could not load AI models: {e}")
+    print("[*] Falling back to DEMO MODE")
+    from backend.utils.demo_depth import generate_smart_depth
+    USE_REAL_AI = False
 def estimate_depth(image, quality_mode="Fast (Preview)", colormap_style="Inferno"):
     """
+    Estimate depth from an input image using REAL AI or DEMO MODE
     """
     try:
         # Convert PIL to numpy if needed
         if isinstance(image, Image.Image):
             image = np.array(image)
+        # Generate depth map
+        if USE_REAL_AI:
+            depth = depth_estimator.predict(image)
+            mode_text = "REAL AI (Depth-Anything V2)"
+        else:
+            depth = generate_smart_depth(image)
+            mode_text = "DEMO MODE (Synthetic)"
         # Convert colormap style to cv2 constant
         colormap_dict = {
         info = f"""
 ### ✅ Depth Estimation Complete!
+**Mode**: {mode_text}
 **Input Size**: {image.shape[1]}x{image.shape[0]}
 **Output Size**: {depth.shape[1]}x{depth.shape[0]}
 **Colormap**: {colormap_style}
+{f"**Powered by**: Depth-Anything V2 SMALL (97MB)" if USE_REAL_AI else "**Processing**: Ultra-fast (<50ms) synthetic depth"}
 """
         return depth_colored, depth_gray, info
             image = np.array(image)
         # Get depth estimation
+        if USE_REAL_AI:
+            depth = depth_estimator.predict(image)
+        else:
+            depth = generate_smart_depth(image)
         # Convert colormap
         colormap_dict = {
         if depth_map is None:
             # Generate depth if not provided
+            if USE_REAL_AI:
+                depth_map = depth_estimator.predict(image)
+            else:
+                depth_map = generate_smart_depth(image)
             depth_map = (depth_map * 255).astype(np.uint8)
         elif isinstance(depth_map, Image.Image):
             depth_map = np.array(depth_map)

backend/utils/transformers_depth.py ADDED Viewed

	@@ -0,0 +1,153 @@

+"""
+Real AI Depth Estimation using Hugging Face Transformers
+Uses Depth-Anything V2 directly (no ONNX conversion needed!)
+"""
+import numpy as np
+import torch
+from PIL import Image
+from transformers import AutoImageProcessor, AutoModelForDepthEstimation
+class TransformersDepthEstimator:
+    """
+    Depth estimation using Hugging Face Transformers
+    Easier than ONNX - works directly with PyTorch models!
+    """
+    def __init__(self, model_size="small", device=None, cache_dir=None):
+        """
+        Initialize depth estimator
+        Args:
+            model_size: "small", "base", or "large"
+            device: "cuda", "cpu", or None (auto-detect)
+            cache_dir: Where to cache models (default: project folder)
+        """
+        self.model_size = model_size
+        # Auto-detect device if not specified
+        if device is None:
+            self.device = "cuda" if torch.cuda.is_available() else "cpu"
+        else:
+            self.device = device
+        # Set cache directory to project folder
+        if cache_dir is None:
+            from pathlib import Path
+            cache_dir = Path(__file__).parent.parent / "models" / "cache" / "huggingface"
+            cache_dir.mkdir(parents=True, exist_ok=True)
+            cache_dir = str(cache_dir)
+        print(f"[*] Loading Depth-Anything V2 {model_size.upper()} model...")
+        print(f"[*] Device: {self.device.upper()}")
+        print(f"[*] Cache dir: {cache_dir}")
+        # Model repository mapping
+        model_map = {
+            "small": "depth-anything/Depth-Anything-V2-Small-hf",
+            "base": "depth-anything/Depth-Anything-V2-Base-hf",
+            "large": "depth-anything/Depth-Anything-V2-Large-hf"
+        }
+        if model_size not in model_map:
+            raise ValueError(f"Invalid model_size. Choose from: {list(model_map.keys())}")
+        repo_id = model_map[model_size]
+        # Load processor and model with custom cache directory
+        self.processor = AutoImageProcessor.from_pretrained(
+            repo_id,
+            cache_dir=cache_dir
+        )
+        self.model = AutoModelForDepthEstimation.from_pretrained(
+            repo_id,
+            cache_dir=cache_dir
+        )
+        # Move model to device
+        self.model.to(self.device)
+        self.model.eval()
+        print(f"[+] Model loaded successfully!")
+        print(f"[+] Cached in: {cache_dir}")
+    def predict(self, image):
+        """
+        Predict depth map for an image
+        Args:
+            image: numpy array (H, W, 3) in RGB format
+        Returns:
+            depth: numpy array (H, W) with depth values [0, 1]
+        """
+        # Convert numpy to PIL if needed
+        if isinstance(image, np.ndarray):
+            image_pil = Image.fromarray(image)
+        else:
+            image_pil = image
+        # Prepare image
+        inputs = self.processor(images=image_pil, return_tensors="pt")
+        # Move inputs to device
+        inputs = {k: v.to(self.device) for k, v in inputs.items()}
+        # Inference
+        with torch.no_grad():
+            outputs = self.model(**inputs)
+            predicted_depth = outputs.predicted_depth
+        # Interpolate to original size
+        prediction = torch.nn.functional.interpolate(
+            predicted_depth.unsqueeze(1),
+            size=image_pil.size[::-1],
+            mode="bicubic",
+            align_corners=False,
+        )
+        # Convert to numpy and normalize
+        depth = prediction.squeeze().cpu().numpy()
+        # Normalize to [0, 1]
+        depth = (depth - depth.min()) / (depth.max() - depth.min())
+        return depth
+# Test function
+if __name__ == "__main__":
+    import cv2
+    print("=" * 70)
+    print("  Testing Depth-Anything V2 with Transformers")
+    print("=" * 70)
+    # Create estimator
+    estimator = TransformersDepthEstimator(model_size="small")
+    # Create test image
+    print("[*] Creating test image...")
+    test_image = np.random.randint(0, 255, (518, 518, 3), dtype=np.uint8)
+    # Predict depth
+    print("[*] Running depth estimation...")
+    import time
+    start = time.time()
+    depth = estimator.predict(test_image)
+    elapsed = (time.time() - start) * 1000
+    print(f"[+] Depth estimation complete!")
+    print(f"[+] Processing time: {elapsed:.2f}ms")
+    print(f"[+] Output shape: {depth.shape}")
+    print(f"[+] Depth range: [{depth.min():.3f}, {depth.max():.3f}]")
+    print("\n" + "=" * 70)
+    print("  SUCCESS! Real AI Depth Estimation Working!")
+    print("=" * 70)
+    print("\nYou can now use real AI depth estimation!")
+    print("\nTo use in your app:")
+    print("  from backend.utils.transformers_depth import TransformersDepthEstimator")
+    print("  estimator = TransformersDepthEstimator('small')")
+    print("  depth = estimator.predict(image)")
+    print("=" * 70)

requirements.txt CHANGED Viewed

@@ -1,13 +1,16 @@
 # Gradio and UI
 gradio==4.44.1
 # Core ML and image processing
-onnxruntime-gpu==1.20.1
 opencv-python==4.10.0.84
 Pillow>=8.0,<11.0
 numpy==1.26.4
-# Optional: For downloading models from HuggingFace
 huggingface-hub==0.27.0
 # Utilities

 # Gradio and UI
 gradio==4.44.1
+# Real AI Models - Depth-Anything V2
+torch>=2.0.0
+transformers>=4.30.0
 # Core ML and image processing
 opencv-python==4.10.0.84
 Pillow>=8.0,<11.0
 numpy==1.26.4
+# For downloading models from HuggingFace
 huggingface-hub==0.27.0
 # Utilities