Spaces:
Sleeping
Sleeping
File size: 4,824 Bytes
191a797 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 |
"""
Real AI Depth Estimation using Hugging Face Transformers
Uses Depth-Anything V2 directly (no ONNX conversion needed!)
"""
import numpy as np
import torch
from PIL import Image
from transformers import AutoImageProcessor, AutoModelForDepthEstimation
class TransformersDepthEstimator:
"""
Depth estimation using Hugging Face Transformers
Easier than ONNX - works directly with PyTorch models!
"""
def __init__(self, model_size="small", device=None, cache_dir=None):
"""
Initialize depth estimator
Args:
model_size: "small", "base", or "large"
device: "cuda", "cpu", or None (auto-detect)
cache_dir: Where to cache models (default: project folder)
"""
self.model_size = model_size
# Auto-detect device if not specified
if device is None:
self.device = "cuda" if torch.cuda.is_available() else "cpu"
else:
self.device = device
# Set cache directory to project folder
if cache_dir is None:
from pathlib import Path
cache_dir = Path(__file__).parent.parent / "models" / "cache" / "huggingface"
cache_dir.mkdir(parents=True, exist_ok=True)
cache_dir = str(cache_dir)
print(f"[*] Loading Depth-Anything V2 {model_size.upper()} model...")
print(f"[*] Device: {self.device.upper()}")
print(f"[*] Cache dir: {cache_dir}")
# Model repository mapping
model_map = {
"small": "depth-anything/Depth-Anything-V2-Small-hf",
"base": "depth-anything/Depth-Anything-V2-Base-hf",
"large": "depth-anything/Depth-Anything-V2-Large-hf"
}
if model_size not in model_map:
raise ValueError(f"Invalid model_size. Choose from: {list(model_map.keys())}")
repo_id = model_map[model_size]
# Load processor and model with custom cache directory
self.processor = AutoImageProcessor.from_pretrained(
repo_id,
cache_dir=cache_dir
)
self.model = AutoModelForDepthEstimation.from_pretrained(
repo_id,
cache_dir=cache_dir
)
# Move model to device
self.model.to(self.device)
self.model.eval()
print(f"[+] Model loaded successfully!")
print(f"[+] Cached in: {cache_dir}")
def predict(self, image):
"""
Predict depth map for an image
Args:
image: numpy array (H, W, 3) in RGB format
Returns:
depth: numpy array (H, W) with depth values [0, 1]
"""
# Convert numpy to PIL if needed
if isinstance(image, np.ndarray):
image_pil = Image.fromarray(image)
else:
image_pil = image
# Prepare image
inputs = self.processor(images=image_pil, return_tensors="pt")
# Move inputs to device
inputs = {k: v.to(self.device) for k, v in inputs.items()}
# Inference
with torch.no_grad():
outputs = self.model(**inputs)
predicted_depth = outputs.predicted_depth
# Interpolate to original size
prediction = torch.nn.functional.interpolate(
predicted_depth.unsqueeze(1),
size=image_pil.size[::-1],
mode="bicubic",
align_corners=False,
)
# Convert to numpy and normalize
depth = prediction.squeeze().cpu().numpy()
# Normalize to [0, 1]
depth = (depth - depth.min()) / (depth.max() - depth.min())
return depth
# Test function
if __name__ == "__main__":
import cv2
print("=" * 70)
print(" Testing Depth-Anything V2 with Transformers")
print("=" * 70)
# Create estimator
estimator = TransformersDepthEstimator(model_size="small")
# Create test image
print("[*] Creating test image...")
test_image = np.random.randint(0, 255, (518, 518, 3), dtype=np.uint8)
# Predict depth
print("[*] Running depth estimation...")
import time
start = time.time()
depth = estimator.predict(test_image)
elapsed = (time.time() - start) * 1000
print(f"[+] Depth estimation complete!")
print(f"[+] Processing time: {elapsed:.2f}ms")
print(f"[+] Output shape: {depth.shape}")
print(f"[+] Depth range: [{depth.min():.3f}, {depth.max():.3f}]")
print("\n" + "=" * 70)
print(" SUCCESS! Real AI Depth Estimation Working!")
print("=" * 70)
print("\nYou can now use real AI depth estimation!")
print("\nTo use in your app:")
print(" from backend.utils.transformers_depth import TransformersDepthEstimator")
print(" estimator = TransformersDepthEstimator('small')")
print(" depth = estimator.predict(image)")
print("=" * 70)
|