Upload 7 files

Browse files

Files changed (7) hide show

.gitignore +3 -0
config.json +57 -0
handler.py +130 -0
model.safetensors +3 -0
preprocessor_config.json +26 -0
requirements.txt +0 -0
test_hf.py +66 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,3 @@

+.idea
+.env
+.venv

config.json ADDED Viewed

	@@ -0,0 +1,57 @@

+{
+  "_commit_hash": null,
+  "architectures": [
+    "DepthAnythingForDepthEstimation"
+  ],
+  "backbone": null,
+  "backbone_config": {
+    "architectures": [
+      "Dinov2Model"
+    ],
+    "hidden_size": 768,
+    "image_size": 518,
+    "model_type": "dinov2",
+    "num_attention_heads": 12,
+    "out_features": [
+      "stage3",
+      "stage6",
+      "stage9",
+      "stage12"
+    ],
+    "out_indices": [
+      3,
+      6,
+      9,
+      12
+    ],
+    "patch_size": 14,
+    "reshape_hidden_states": false,
+    "torch_dtype": "float32"
+  },
+  "backbone_kwargs": null,
+  "depth_estimation_type": "metric",
+  "fusion_hidden_size": 128,
+  "head_hidden_size": 32,
+  "head_in_index": -1,
+  "initializer_range": 0.02,
+  "max_depth": 20,
+  "model_type": "depth_anything",
+  "neck_hidden_sizes": [
+    96,
+    192,
+    384,
+    768
+  ],
+  "patch_size": 14,
+  "reassemble_factors": [
+    4,
+    2,
+    1,
+    0.5
+  ],
+  "reassemble_hidden_size": 768,
+  "torch_dtype": "float32",
+  "transformers_version": null,
+  "use_pretrained_backbone": false,
+  "use_timm_backbone": false
+}

handler.py ADDED Viewed

	@@ -0,0 +1,130 @@

+import os
+import torch
+import base64
+import io
+import requests
+import matplotlib.pyplot as plt
+from PIL import Image
+from transformers import AutoImageProcessor, AutoModelForDepthEstimation
+import numpy as np
+class EndpointHandler:
+    def __init__(self, path=""):
+        # Load model and processor
+        self.model_path = path or os.environ.get("MODEL_PATH", "")
+        print(self.model_path)
+        self.image_processor = AutoImageProcessor.from_pretrained(self.model_path)
+        self.model = AutoModelForDepthEstimation.from_pretrained(self.model_path)
+        # Move model to GPU if available
+        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        self.model = self.model.to(self.device)
+        # Set model to evaluation mode
+        self.model.eval()
+    def __call__(self, data):
+        """
+        Args:
+            data: Input data in the format of a dictionary with either:
+                - 'url': URL of the image
+                - 'file': Base64 encoded image
+                - 'image': PIL Image object
+                - 'visualization': Boolean flag to return visualization-friendly format (default: False)
+                - 'points': List of points to return depth values for (default: None)[[x1 y1] [x2 y2] ... [xn yn]]
+        Returns:
+            Dictionary containing the depth map and metadata
+        """
+        # Process input data
+        if "url" in data:
+            # Download image from URL
+            response = requests.get(data["url"], stream=True)
+            response.raise_for_status()  # Raise an exception for HTTP errors
+            image = Image.open(response.raw).convert("RGB")
+        elif "file" in data:
+            # Decode base64 image
+            image_bytes = base64.b64decode(data["file"])
+            image = Image.open(io.BytesIO(image_bytes)).convert("RGB")
+        elif "image" in data:
+            # Direct PIL image input
+            image = data["image"]
+        else:
+            raise ValueError(
+                "No valid image input found. Please provide either 'url', 'file' (base64 encoded image), or 'image' (PIL Image object).")
+        # Prepare image for the model
+        inputs = self.image_processor(images=image, return_tensors="pt")
+        inputs = {k: v.to(self.device) for k, v in inputs.items()}
+        # Perform inference
+        with torch.no_grad():
+            outputs = self.model(**inputs)
+            predicted_depth = outputs.predicted_depth
+        # Interpolate to original size
+        prediction = torch.nn.functional.interpolate(
+            predicted_depth.unsqueeze(1),
+            size=image.size[::-1],  # (height, width)
+            mode="bicubic",
+            align_corners=False,
+        ).squeeze()
+        # Convert to numpy and normalize for visualization
+        depth_map = prediction.cpu().numpy()
+        # Normalize depth map to 0-1 range for better visualization
+        depth_min = depth_map.min()
+        depth_max = depth_map.max()
+        normalized_depth = (depth_map - depth_min) / (depth_max - depth_min)
+        # Check if visualization is requested
+        visualization = data.get("visualization", False)
+        # Check the pixels to return  if no pixel provided will return the [0,0] position
+        points = data.get("points", np.array([[0, 0]]))
+        list_of_lists = points.astype(int).tolist()
+        print(list_of_lists)
+        # map = np.array(depth_map)
+        # print(map.shape)
+        if visualization:
+            # Convert depth map to a visualization-friendly format (grayscale image)
+            # Create a figure and plot the depth map
+            plt.figure(figsize=(10, 10))
+            plt.imshow(normalized_depth, cmap='plasma')
+            plt.axis('off')
+            # Save the figure to a BytesIO object
+            buf = io.BytesIO()
+            plt.savefig(buf, format='png', bbox_inches='tight', pad_inches=0)
+            plt.close()
+            buf.seek(0)
+            # Convert to base64 for easy transmission
+            img_str = base64.b64encode(buf.getvalue()).decode('utf-8')
+            result = {
+                "visualization": img_str,
+                "min_depth": float(depth_min),
+                "max_depth": float(depth_max),
+                "format": "base64_png"
+            }
+        else:
+            depths = [depth_map[i[1]][i[0]] for i in list_of_lists]
+            result = {
+                "depths": depths
+                # "depth": normalized_depth.tolist(),
+                # "depth": compressed_depth_base64,
+                # "depth_map": depth_map,
+                # "min_depth": float(depth_min),
+                # "max_depth": float(depth_max),
+                # "shape": list(normalized_depth.shape)
+            }
+        return result

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:75c2e06f0d32f1e9daec1a4e8d193e18a338144586e08ac701f34e85fdb29d2f
+size 134

preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,26 @@

+{
+  "do_normalize": true,
+  "do_pad": false,
+  "do_rescale": true,
+  "do_resize": true,
+  "ensure_multiple_of": 14,
+  "image_mean": [
+    0.485,
+    0.456,
+    0.406
+  ],
+  "image_processor_type": "DPTImageProcessor",
+  "image_std": [
+    0.229,
+    0.224,
+    0.225
+  ],
+  "keep_aspect_ratio": true,
+  "resample": 3,
+  "rescale_factor": 0.00392156862745098,
+  "size": {
+    "height": 518,
+    "width": 518
+  },
+  "size_divisor": null
+}

requirements.txt ADDED Viewed

Binary file (164 Bytes). View file

test_hf.py ADDED Viewed

	@@ -0,0 +1,66 @@

+import os
+import requests
+import base64
+from PIL import Image
+import io
+# URL del endpoint proporcionado por Hugging Face
+# ENDPOINT_URL = "https://qh7glc3xj9iw4tk2.eu-west-1.aws.endpoints.huggingface.cloud"
+ENDPOINT_URL = os.environ.get("ENDPOINT_URL", "")
+# Token de API de Hugging Face
+# API_TOKEN = "hf_..."
+API_TOKEN = os.environ.get("API_TOKEN", "")
+headers = {
+    "Authorization": f"Bearer {API_TOKEN}",
+    "Content-Type": "application/json"
+}
+# Cargar y codificar una imagen
+# image = Image.open("mine.jpeg")
+# buffered = io.BytesIO()
+# image.save(buffered, format="JPEG")
+# img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
+# Preparar los datos para la solicitud
+# payload = {
+#     "inputs"       : {
+#
+#     },
+#     "file"         : img_str,
+#     "visualization": True
+# }
+#----------------------------------------------------------------------------------
+# Preparar los datos para la solicitud
+payload = {
+    "inputs"       : {
+    },
+    "url"          : "https://images.unsplash.com/photo-1586023492125-27b2c045efd7?fm=jpg&q=60&w=3000&ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxzZWFyY2h8Mnx8aW50ZXJpb3IlMjBkZXNpZ258ZW58MHx8MHx8fDA%3D",
+    # "visualization": False,
+    "x": 80,
+    "y": 60
+}
+# Enviar la solicitud
+response = requests.post(ENDPOINT_URL, headers=headers, json=payload)
+# Procesar la respuesta
+if response.status_code == 200:
+    result = response.json()
+    if "visualization" in result:
+        # Decodificar y guardar la visualización
+        vis_bytes = base64.b64decode(result["visualization"])
+        with open("depth_visualization.png", "wb") as f:
+            f.write(vis_bytes)
+        print("Visualización guardada como 'depth_visualization.png'")
+    print(f"Profundidad: {result.get('deph')}")
+else:
+    print(f"Error: {response.status_code}")
+    print(response.text)