rfigueroa commited on
Commit
4ba40aa
verified
1 Parent(s): 693903b

Upload 7 files

Browse files
Files changed (7) hide show
  1. .gitignore +3 -0
  2. config.json +57 -0
  3. handler.py +130 -0
  4. model.safetensors +3 -0
  5. preprocessor_config.json +26 -0
  6. requirements.txt +0 -0
  7. test_hf.py +66 -0
.gitignore ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ .idea
2
+ .env
3
+ .venv
config.json ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_commit_hash": null,
3
+ "architectures": [
4
+ "DepthAnythingForDepthEstimation"
5
+ ],
6
+ "backbone": null,
7
+ "backbone_config": {
8
+ "architectures": [
9
+ "Dinov2Model"
10
+ ],
11
+ "hidden_size": 768,
12
+ "image_size": 518,
13
+ "model_type": "dinov2",
14
+ "num_attention_heads": 12,
15
+ "out_features": [
16
+ "stage3",
17
+ "stage6",
18
+ "stage9",
19
+ "stage12"
20
+ ],
21
+ "out_indices": [
22
+ 3,
23
+ 6,
24
+ 9,
25
+ 12
26
+ ],
27
+ "patch_size": 14,
28
+ "reshape_hidden_states": false,
29
+ "torch_dtype": "float32"
30
+ },
31
+ "backbone_kwargs": null,
32
+ "depth_estimation_type": "metric",
33
+ "fusion_hidden_size": 128,
34
+ "head_hidden_size": 32,
35
+ "head_in_index": -1,
36
+ "initializer_range": 0.02,
37
+ "max_depth": 20,
38
+ "model_type": "depth_anything",
39
+ "neck_hidden_sizes": [
40
+ 96,
41
+ 192,
42
+ 384,
43
+ 768
44
+ ],
45
+ "patch_size": 14,
46
+ "reassemble_factors": [
47
+ 4,
48
+ 2,
49
+ 1,
50
+ 0.5
51
+ ],
52
+ "reassemble_hidden_size": 768,
53
+ "torch_dtype": "float32",
54
+ "transformers_version": null,
55
+ "use_pretrained_backbone": false,
56
+ "use_timm_backbone": false
57
+ }
handler.py ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import torch
3
+ import base64
4
+ import io
5
+ import requests
6
+ import matplotlib.pyplot as plt
7
+ from PIL import Image
8
+ from transformers import AutoImageProcessor, AutoModelForDepthEstimation
9
+ import numpy as np
10
+
11
+
12
+ class EndpointHandler:
13
+ def __init__(self, path=""):
14
+ # Load model and processor
15
+ self.model_path = path or os.environ.get("MODEL_PATH", "")
16
+ print(self.model_path)
17
+ self.image_processor = AutoImageProcessor.from_pretrained(self.model_path)
18
+ self.model = AutoModelForDepthEstimation.from_pretrained(self.model_path)
19
+
20
+ # Move model to GPU if available
21
+ self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
22
+ self.model = self.model.to(self.device)
23
+
24
+ # Set model to evaluation mode
25
+ self.model.eval()
26
+
27
+ def __call__(self, data):
28
+ """
29
+ Args:
30
+ data: Input data in the format of a dictionary with either:
31
+ - 'url': URL of the image
32
+ - 'file': Base64 encoded image
33
+ - 'image': PIL Image object
34
+ - 'visualization': Boolean flag to return visualization-friendly format (default: False)
35
+ - 'points': List of points to return depth values for (default: None)[[x1 y1] [x2 y2] ... [xn yn]]
36
+ Returns:
37
+ Dictionary containing the depth map and metadata
38
+ """
39
+ # Process input data
40
+
41
+ if "url" in data:
42
+ # Download image from URL
43
+ response = requests.get(data["url"], stream=True)
44
+ response.raise_for_status() # Raise an exception for HTTP errors
45
+ image = Image.open(response.raw).convert("RGB")
46
+
47
+ elif "file" in data:
48
+ # Decode base64 image
49
+ image_bytes = base64.b64decode(data["file"])
50
+ image = Image.open(io.BytesIO(image_bytes)).convert("RGB")
51
+
52
+ elif "image" in data:
53
+ # Direct PIL image input
54
+ image = data["image"]
55
+
56
+ else:
57
+ raise ValueError(
58
+ "No valid image input found. Please provide either 'url', 'file' (base64 encoded image), or 'image' (PIL Image object).")
59
+
60
+ # Prepare image for the model
61
+ inputs = self.image_processor(images=image, return_tensors="pt")
62
+ inputs = {k: v.to(self.device) for k, v in inputs.items()}
63
+
64
+ # Perform inference
65
+ with torch.no_grad():
66
+ outputs = self.model(**inputs)
67
+ predicted_depth = outputs.predicted_depth
68
+
69
+ # Interpolate to original size
70
+ prediction = torch.nn.functional.interpolate(
71
+ predicted_depth.unsqueeze(1),
72
+ size=image.size[::-1], # (height, width)
73
+ mode="bicubic",
74
+ align_corners=False,
75
+ ).squeeze()
76
+
77
+ # Convert to numpy and normalize for visualization
78
+ depth_map = prediction.cpu().numpy()
79
+
80
+ # Normalize depth map to 0-1 range for better visualization
81
+ depth_min = depth_map.min()
82
+ depth_max = depth_map.max()
83
+ normalized_depth = (depth_map - depth_min) / (depth_max - depth_min)
84
+
85
+ # Check if visualization is requested
86
+ visualization = data.get("visualization", False)
87
+
88
+ # Check the pixels to return if no pixel provided will return the [0,0] position
89
+ points = data.get("points", np.array([[0, 0]]))
90
+ list_of_lists = points.astype(int).tolist()
91
+ print(list_of_lists)
92
+
93
+ # map = np.array(depth_map)
94
+ # print(map.shape)
95
+
96
+ if visualization:
97
+ # Convert depth map to a visualization-friendly format (grayscale image)
98
+ # Create a figure and plot the depth map
99
+ plt.figure(figsize=(10, 10))
100
+ plt.imshow(normalized_depth, cmap='plasma')
101
+ plt.axis('off')
102
+
103
+ # Save the figure to a BytesIO object
104
+ buf = io.BytesIO()
105
+ plt.savefig(buf, format='png', bbox_inches='tight', pad_inches=0)
106
+ plt.close()
107
+ buf.seek(0)
108
+
109
+ # Convert to base64 for easy transmission
110
+ img_str = base64.b64encode(buf.getvalue()).decode('utf-8')
111
+
112
+ result = {
113
+ "visualization": img_str,
114
+ "min_depth": float(depth_min),
115
+ "max_depth": float(depth_max),
116
+ "format": "base64_png"
117
+ }
118
+ else:
119
+ depths = [depth_map[i[1]][i[0]] for i in list_of_lists]
120
+ result = {
121
+ "depths": depths
122
+ # "depth": normalized_depth.tolist(),
123
+ # "depth": compressed_depth_base64,
124
+ # "depth_map": depth_map,
125
+ # "min_depth": float(depth_min),
126
+ # "max_depth": float(depth_max),
127
+ # "shape": list(normalized_depth.shape)
128
+ }
129
+
130
+ return result
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:75c2e06f0d32f1e9daec1a4e8d193e18a338144586e08ac701f34e85fdb29d2f
3
+ size 134
preprocessor_config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_normalize": true,
3
+ "do_pad": false,
4
+ "do_rescale": true,
5
+ "do_resize": true,
6
+ "ensure_multiple_of": 14,
7
+ "image_mean": [
8
+ 0.485,
9
+ 0.456,
10
+ 0.406
11
+ ],
12
+ "image_processor_type": "DPTImageProcessor",
13
+ "image_std": [
14
+ 0.229,
15
+ 0.224,
16
+ 0.225
17
+ ],
18
+ "keep_aspect_ratio": true,
19
+ "resample": 3,
20
+ "rescale_factor": 0.00392156862745098,
21
+ "size": {
22
+ "height": 518,
23
+ "width": 518
24
+ },
25
+ "size_divisor": null
26
+ }
requirements.txt ADDED
Binary file (164 Bytes). View file
 
test_hf.py ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ import requests
4
+ import base64
5
+ from PIL import Image
6
+ import io
7
+
8
+ # URL del endpoint proporcionado por Hugging Face
9
+ # ENDPOINT_URL = "https://qh7glc3xj9iw4tk2.eu-west-1.aws.endpoints.huggingface.cloud"
10
+ ENDPOINT_URL = os.environ.get("ENDPOINT_URL", "")
11
+ # Token de API de Hugging Face
12
+ # API_TOKEN = "hf_..."
13
+ API_TOKEN = os.environ.get("API_TOKEN", "")
14
+ headers = {
15
+ "Authorization": f"Bearer {API_TOKEN}",
16
+ "Content-Type": "application/json"
17
+ }
18
+
19
+ # Cargar y codificar una imagen
20
+ # image = Image.open("mine.jpeg")
21
+ # buffered = io.BytesIO()
22
+ # image.save(buffered, format="JPEG")
23
+ # img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
24
+
25
+ # Preparar los datos para la solicitud
26
+ # payload = {
27
+ # "inputs" : {
28
+ #
29
+ # },
30
+ # "file" : img_str,
31
+ # "visualization": True
32
+ # }
33
+
34
+ #----------------------------------------------------------------------------------
35
+
36
+ # Preparar los datos para la solicitud
37
+ payload = {
38
+ "inputs" : {
39
+
40
+ },
41
+ "url" : "https://images.unsplash.com/photo-1586023492125-27b2c045efd7?fm=jpg&q=60&w=3000&ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxzZWFyY2h8Mnx8aW50ZXJpb3IlMjBkZXNpZ258ZW58MHx8MHx8fDA%3D",
42
+ # "visualization": False,
43
+ "x": 80,
44
+ "y": 60
45
+
46
+ }
47
+
48
+
49
+
50
+ # Enviar la solicitud
51
+ response = requests.post(ENDPOINT_URL, headers=headers, json=payload)
52
+
53
+ # Procesar la respuesta
54
+ if response.status_code == 200:
55
+ result = response.json()
56
+ if "visualization" in result:
57
+ # Decodificar y guardar la visualizaci贸n
58
+ vis_bytes = base64.b64decode(result["visualization"])
59
+ with open("depth_visualization.png", "wb") as f:
60
+ f.write(vis_bytes)
61
+ print("Visualizaci贸n guardada como 'depth_visualization.png'")
62
+ print(f"Profundidad: {result.get('deph')}")
63
+
64
+ else:
65
+ print(f"Error: {response.status_code}")
66
+ print(response.text)