File size: 4,824 Bytes
191a797
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
"""
Real AI Depth Estimation using Hugging Face Transformers
Uses Depth-Anything V2 directly (no ONNX conversion needed!)
"""

import numpy as np
import torch
from PIL import Image
from transformers import AutoImageProcessor, AutoModelForDepthEstimation

class TransformersDepthEstimator:
    """
    Depth estimation using Hugging Face Transformers
    Easier than ONNX - works directly with PyTorch models!
    """

    def __init__(self, model_size="small", device=None, cache_dir=None):
        """
        Initialize depth estimator

        Args:
            model_size: "small", "base", or "large"
            device: "cuda", "cpu", or None (auto-detect)
            cache_dir: Where to cache models (default: project folder)
        """
        self.model_size = model_size

        # Auto-detect device if not specified
        if device is None:
            self.device = "cuda" if torch.cuda.is_available() else "cpu"
        else:
            self.device = device

        # Set cache directory to project folder
        if cache_dir is None:
            from pathlib import Path
            cache_dir = Path(__file__).parent.parent / "models" / "cache" / "huggingface"
            cache_dir.mkdir(parents=True, exist_ok=True)
            cache_dir = str(cache_dir)

        print(f"[*] Loading Depth-Anything V2 {model_size.upper()} model...")
        print(f"[*] Device: {self.device.upper()}")
        print(f"[*] Cache dir: {cache_dir}")

        # Model repository mapping
        model_map = {
            "small": "depth-anything/Depth-Anything-V2-Small-hf",
            "base": "depth-anything/Depth-Anything-V2-Base-hf",
            "large": "depth-anything/Depth-Anything-V2-Large-hf"
        }

        if model_size not in model_map:
            raise ValueError(f"Invalid model_size. Choose from: {list(model_map.keys())}")

        repo_id = model_map[model_size]

        # Load processor and model with custom cache directory
        self.processor = AutoImageProcessor.from_pretrained(
            repo_id,
            cache_dir=cache_dir
        )
        self.model = AutoModelForDepthEstimation.from_pretrained(
            repo_id,
            cache_dir=cache_dir
        )

        # Move model to device
        self.model.to(self.device)
        self.model.eval()

        print(f"[+] Model loaded successfully!")
        print(f"[+] Cached in: {cache_dir}")

    def predict(self, image):
        """
        Predict depth map for an image

        Args:
            image: numpy array (H, W, 3) in RGB format

        Returns:
            depth: numpy array (H, W) with depth values [0, 1]
        """
        # Convert numpy to PIL if needed
        if isinstance(image, np.ndarray):
            image_pil = Image.fromarray(image)
        else:
            image_pil = image

        # Prepare image
        inputs = self.processor(images=image_pil, return_tensors="pt")

        # Move inputs to device
        inputs = {k: v.to(self.device) for k, v in inputs.items()}

        # Inference
        with torch.no_grad():
            outputs = self.model(**inputs)
            predicted_depth = outputs.predicted_depth

        # Interpolate to original size
        prediction = torch.nn.functional.interpolate(
            predicted_depth.unsqueeze(1),
            size=image_pil.size[::-1],
            mode="bicubic",
            align_corners=False,
        )

        # Convert to numpy and normalize
        depth = prediction.squeeze().cpu().numpy()

        # Normalize to [0, 1]
        depth = (depth - depth.min()) / (depth.max() - depth.min())

        return depth


# Test function
if __name__ == "__main__":
    import cv2

    print("=" * 70)
    print("  Testing Depth-Anything V2 with Transformers")
    print("=" * 70)

    # Create estimator
    estimator = TransformersDepthEstimator(model_size="small")

    # Create test image
    print("[*] Creating test image...")
    test_image = np.random.randint(0, 255, (518, 518, 3), dtype=np.uint8)

    # Predict depth
    print("[*] Running depth estimation...")
    import time
    start = time.time()
    depth = estimator.predict(test_image)
    elapsed = (time.time() - start) * 1000

    print(f"[+] Depth estimation complete!")
    print(f"[+] Processing time: {elapsed:.2f}ms")
    print(f"[+] Output shape: {depth.shape}")
    print(f"[+] Depth range: [{depth.min():.3f}, {depth.max():.3f}]")

    print("\n" + "=" * 70)
    print("  SUCCESS! Real AI Depth Estimation Working!")
    print("=" * 70)
    print("\nYou can now use real AI depth estimation!")
    print("\nTo use in your app:")
    print("  from backend.utils.transformers_depth import TransformersDepthEstimator")
    print("  estimator = TransformersDepthEstimator('small')")
    print("  depth = estimator.predict(image)")
    print("=" * 70)