Muhammad Ahmad Zia commited on
Commit
5df4cbb
·
verified ·
1 Parent(s): 2fb9bd2

Upload 11 files

Browse files
Files changed (12) hide show
  1. .gitattributes +2 -0
  2. Reference_coin.jpeg +0 -0
  3. Test20.jpg +3 -0
  4. Test21.jpg +3 -0
  5. app.py +912 -0
  6. coin_det.pt +3 -0
  7. requirements.txt +10 -0
  8. scalingtestupdated.py +178 -0
  9. u2net.py +525 -0
  10. u2netp.pth +3 -0
  11. yolo11n.pt +3 -0
  12. yolov8x-worldv2.pt +3 -0
.gitattributes CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ Test20.jpg filter=lfs diff=lfs merge=lfs -text
37
+ Test21.jpg filter=lfs diff=lfs merge=lfs -text
Reference_coin.jpeg ADDED
Test20.jpg ADDED

Git LFS Details

  • SHA256: 4c272b36a60acf4145cfe0c102c35e90278f9ede3971c63df491b0346ea08fc5
  • Pointer size: 133 Bytes
  • Size of remote file: 13.8 MB
Test21.jpg ADDED

Git LFS Details

  • SHA256: 59b723ad853ddfa43f2bb69396db6616bd4b12e81f5e51ff1df8278d7917aada
  • Pointer size: 132 Bytes
  • Size of remote file: 7.3 MB
app.py ADDED
@@ -0,0 +1,912 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+ import os
3
+ import gc
4
+ import base64
5
+ import io
6
+ import time
7
+ import shutil
8
+ import numpy as np
9
+ import torch
10
+ import cv2
11
+ import ezdxf
12
+ from ezdxf.addons.text2path import make_paths_from_str
13
+ from ezdxf import path
14
+ from ezdxf.addons import text2path
15
+ from ezdxf.enums import TextEntityAlignment
16
+ from ezdxf.fonts.fonts import FontFace, get_font_face
17
+ import gradio as gr
18
+ from PIL import Image, ImageEnhance
19
+ from pathlib import Path
20
+ from typing import List, Union
21
+ from ultralytics import YOLOWorld, YOLO
22
+ from ultralytics.engine.results import Results
23
+ from ultralytics.utils.plotting import save_one_box
24
+ from transformers import AutoModelForImageSegmentation
25
+ from torchvision import transforms
26
+ from scalingtestupdated import calculate_scaling_factor
27
+ from shapely.geometry import Polygon, Point, MultiPolygon
28
+ from scipy.interpolate import splprep, splev
29
+ from scipy.ndimage import gaussian_filter1d
30
+ from u2net import U2NETP
31
+
32
+ # ---------------------
33
+ # Create a cache folder for models
34
+ # ---------------------
35
+ CACHE_DIR = os.path.join(os.path.dirname(__file__), ".cache")
36
+ os.makedirs(CACHE_DIR, exist_ok=True)
37
+
38
+ # ---------------------
39
+ # Custom Exceptions
40
+ # ---------------------
41
+ class DrawerNotDetectedError(Exception):
42
+ """Raised when the drawer cannot be detected in the image"""
43
+ pass
44
+
45
+ class ReferenceBoxNotDetectedError(Exception):
46
+ """Raised when the Reference coin cannot be detected in the image"""
47
+ pass
48
+
49
+ class BoundaryOverlapError(Exception):
50
+ """Raised when the optional boundary dimensions are too small and overlap with the inner contours."""
51
+ pass
52
+
53
+ class TextOverlapError(Exception):
54
+ """Raised when the text overlaps with the inner contours (with a margin of 0.75)."""
55
+ pass
56
+ class boundary_issue(Exception):
57
+ """Raised when bounds are given but rectangular boundary is no."""
58
+ # ---------------------
59
+ # Global Model Initialization with caching and print statements
60
+ # ---------------------
61
+ print("Loading YOLOWorld model...")
62
+ start_time = time.time()
63
+ yolo_model_path = os.path.join(CACHE_DIR, "yolov8x-worldv2.pt")
64
+ if not os.path.exists(yolo_model_path):
65
+ print("Caching YOLOWorld model to", yolo_model_path)
66
+ shutil.copy("yolov8x-worldv2.pt", yolo_model_path)
67
+ drawer_detector_global = YOLOWorld(yolo_model_path)
68
+ drawer_detector_global.set_classes(["box"])
69
+ print("YOLOWorld model loaded in {:.2f} seconds".format(time.time() - start_time))
70
+
71
+ print("Loading YOLO reference model...")
72
+ start_time = time.time()
73
+ reference_model_path = os.path.join(CACHE_DIR, "coin_det.pt")
74
+ if not os.path.exists(reference_model_path):
75
+ print("Caching YOLO reference model to", reference_model_path)
76
+ shutil.copy("coin_det.pt", reference_model_path)
77
+ reference_detector_global = YOLO(reference_model_path)
78
+ print("YOLO reference model loaded in {:.2f} seconds".format(time.time() - start_time))
79
+
80
+ print("Loading U²-Net model for reference background removal (U2NETP)...")
81
+ start_time = time.time()
82
+ u2net_model_path = os.path.join(CACHE_DIR, "u2netp.pth")
83
+ if not os.path.exists(u2net_model_path):
84
+ print("Caching U²-Net model to", u2net_model_path)
85
+ shutil.copy("u2netp.pth", u2net_model_path)
86
+ u2net_global = U2NETP(3, 1)
87
+ u2net_global.load_state_dict(torch.load(u2net_model_path, map_location="cpu"))
88
+ device = "cpu"
89
+ u2net_global.to(device)
90
+ u2net_global.eval()
91
+ print("U²-Net model loaded in {:.2f} seconds".format(time.time() - start_time))
92
+
93
+ print("Loading BiRefNet model...")
94
+ start_time = time.time()
95
+ birefnet_global = AutoModelForImageSegmentation.from_pretrained(
96
+ "zhengpeng7/BiRefNet", trust_remote_code=True, cache_dir=CACHE_DIR
97
+ )
98
+ torch.set_float32_matmul_precision("high")
99
+ birefnet_global.to(device)
100
+ birefnet_global.eval()
101
+ print("BiRefNet model loaded in {:.2f} seconds".format(time.time() - start_time))
102
+
103
+ # Define transform for BiRefNet
104
+ transform_image_global = transforms.Compose([
105
+ transforms.Resize((1024, 1024)),
106
+ transforms.ToTensor(),
107
+ transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
108
+ ])
109
+
110
+ # ---------------------
111
+ # Model Reload Function (if needed)
112
+ # ---------------------
113
+ def unload_and_reload_models():
114
+ global drawer_detector_global, reference_detector_global, birefnet_global, u2net_global
115
+ print("Reloading models...")
116
+ start_time = time.time()
117
+ del drawer_detector_global, reference_detector_global, birefnet_global, u2net_global
118
+ gc.collect()
119
+ if torch.cuda.is_available():
120
+ torch.cuda.empty_cache()
121
+ gc.collect()
122
+ new_drawer_detector = YOLOWorld(os.path.join(CACHE_DIR, "yolov8x-worldv2.pt"))
123
+ new_drawer_detector.set_classes(["box"])
124
+ new_reference_detector = YOLO(os.path.join(CACHE_DIR, "coin_det.pt"))
125
+ new_birefnet = AutoModelForImageSegmentation.from_pretrained(
126
+ "zhengpeng7/BiRefNet", trust_remote_code=True, cache_dir=CACHE_DIR
127
+ )
128
+ new_birefnet.to(device)
129
+ new_birefnet.eval()
130
+ new_u2net = U2NETP(3, 1)
131
+ new_u2net.load_state_dict(torch.load(os.path.join(CACHE_DIR, "u2netp.pth"), map_location="cpu"))
132
+ new_u2net.to(device)
133
+ new_u2net.eval()
134
+ drawer_detector_global = new_drawer_detector
135
+ reference_detector_global = new_reference_detector
136
+ birefnet_global = new_birefnet
137
+ u2net_global = new_u2net
138
+ print("Models reloaded in {:.2f} seconds".format(time.time() - start_time))
139
+
140
+ # ---------------------
141
+ # Helper Function: resize_img (defined once)
142
+ # ---------------------
143
+ def resize_img(img: np.ndarray, resize_dim):
144
+ return np.array(Image.fromarray(img).resize(resize_dim))
145
+
146
+ # ---------------------
147
+ # Other Helper Functions for Detection & Processing
148
+ # ---------------------
149
+ def yolo_detect(image: Union[str, Path, int, Image.Image, list, tuple, np.ndarray, torch.Tensor]) -> np.ndarray:
150
+ t = time.time()
151
+ results: List[Results] = drawer_detector_global.predict(image)
152
+ if not results or len(results) == 0 or len(results[0].boxes) == 0:
153
+ raise DrawerNotDetectedError("Drawer not detected in the image.")
154
+ print("Drawer detection completed in {:.2f} seconds".format(time.time() - t))
155
+ return save_one_box(results[0].cpu().boxes.xyxy, im=results[0].orig_img, save=False)
156
+
157
+ def detect_reference_square(img: np.ndarray):
158
+ t = time.time()
159
+ res = reference_detector_global.predict(img, conf=0.3)
160
+ if not res or len(res) == 0 or len(res[0].boxes) == 0:
161
+ raise ReferenceBoxNotDetectedError("Reference Coin not detected in the image.")
162
+ print("Reference detection completed in {:.2f} seconds".format(time.time() - t))
163
+ return (
164
+ save_one_box(res[0].cpu().boxes.xyxy, res[0].orig_img, save=False),
165
+ res[0].cpu().boxes.xyxy[0]
166
+ )
167
+
168
+ # Use U2NETP for reference background removal.
169
+ def remove_bg_u2netp(image: np.ndarray) -> np.ndarray:
170
+ t = time.time()
171
+ image_pil = Image.fromarray(image)
172
+ transform_u2netp = transforms.Compose([
173
+ transforms.Resize((320, 320)),
174
+ transforms.ToTensor(),
175
+ transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
176
+ ])
177
+ input_tensor = transform_u2netp(image_pil).unsqueeze(0).to("cpu")
178
+ with torch.no_grad():
179
+ outputs = u2net_global(input_tensor)
180
+ pred = outputs[0]
181
+ pred = (pred - pred.min()) / (pred.max() - pred.min() + 1e-8)
182
+ pred_np = pred.squeeze().cpu().numpy()
183
+ pred_np = cv2.resize(pred_np, (image_pil.width, image_pil.height))
184
+ pred_np = (pred_np * 255).astype(np.uint8)
185
+ print("U2NETP background removal completed in {:.2f} seconds".format(time.time() - t))
186
+ return pred_np
187
+
188
+ # Use BiRefNet for main object background removal.
189
+ def remove_bg(image: np.ndarray) -> np.ndarray:
190
+ t = time.time()
191
+ image_pil = Image.fromarray(image)
192
+ input_images = transform_image_global(image_pil).unsqueeze(0).to("cpu")
193
+ with torch.no_grad():
194
+ preds = birefnet_global(input_images)[-1].sigmoid().cpu()
195
+ pred = preds[0].squeeze()
196
+ pred_pil = transforms.ToPILImage()(pred)
197
+ scale_ratio = 1024 / max(image_pil.size)
198
+ scaled_size = (int(image_pil.size[0] * scale_ratio), int(image_pil.size[1] * scale_ratio))
199
+ result = np.array(pred_pil.resize(scaled_size))
200
+ print("BiRefNet background removal completed in {:.2f} seconds".format(time.time() - t))
201
+ return result
202
+
203
+ def make_square(img: np.ndarray):
204
+ height, width = img.shape[:2]
205
+ max_dim = max(height, width)
206
+ pad_height = (max_dim - height) // 2
207
+ pad_width = (max_dim - width) // 2
208
+ pad_height_extra = max_dim - height - 2 * pad_height
209
+ pad_width_extra = max_dim - width - 2 * pad_width
210
+ if len(img.shape) == 3:
211
+ padded = np.pad(img, ((pad_height, pad_height + pad_height_extra),
212
+ (pad_width, pad_width + pad_width_extra),
213
+ (0, 0)), mode="edge")
214
+ else:
215
+ padded = np.pad(img, ((pad_height, pad_height + pad_height_extra),
216
+ (pad_width, pad_width + pad_width_extra)), mode="edge")
217
+ return padded
218
+
219
+ def shrink_bbox(image: np.ndarray, shrink_factor: float):
220
+ height, width = image.shape[:2]
221
+ center_x, center_y = width // 2, height // 2
222
+ new_width = int(width * shrink_factor)
223
+ new_height = int(height * shrink_factor)
224
+ x1 = max(center_x - new_width // 2, 0)
225
+ y1 = max(center_y - new_height // 2, 0)
226
+ x2 = min(center_x + new_width // 2, width)
227
+ y2 = min(center_y + new_height // 2, height)
228
+ return image[y1:y2, x1:x2]
229
+
230
+ def exclude_scaling_box(image: np.ndarray, bbox: np.ndarray, orig_size: tuple, processed_size: tuple, expansion_factor: float = 1.2) -> np.ndarray:
231
+ x_min, y_min, x_max, y_max = map(int, bbox)
232
+ scale_x = processed_size[1] / orig_size[1]
233
+ scale_y = processed_size[0] / orig_size[0]
234
+ x_min = int(x_min * scale_x)
235
+ x_max = int(x_max * scale_x)
236
+ y_min = int(y_min * scale_y)
237
+ y_max = int(y_max * scale_y)
238
+ box_width = x_max - x_min
239
+ box_height = y_max - y_min
240
+ expanded_x_min = max(0, int(x_min - (expansion_factor - 1) * box_width / 2))
241
+ expanded_x_max = min(image.shape[1], int(x_max + (expansion_factor - 1) * box_width / 2))
242
+ expanded_y_min = max(0, int(y_min - (expansion_factor - 1) * box_height / 2))
243
+ expanded_y_max = min(image.shape[0], int(y_max + (expansion_factor - 1) * box_height / 2))
244
+ image[expanded_y_min:expanded_y_max, expanded_x_min:expanded_x_max] = 0
245
+ return image
246
+
247
+ def resample_contour(contour):
248
+ num_points = 1000
249
+ smoothing_factor = 5
250
+ spline_degree = 3
251
+ if len(contour) < spline_degree + 1:
252
+ raise ValueError(f"Contour must have at least {spline_degree + 1} points, but has {len(contour)} points.")
253
+ contour = contour[:, 0, :]
254
+ tck, _ = splprep([contour[:, 0], contour[:, 1]], s=smoothing_factor)
255
+ u = np.linspace(0, 1, num_points)
256
+ resampled_points = splev(u, tck)
257
+ smoothed_x = gaussian_filter1d(resampled_points[0], sigma=1)
258
+ smoothed_y = gaussian_filter1d(resampled_points[1], sigma=1)
259
+ return np.array([smoothed_x, smoothed_y]).T
260
+
261
+ # ---------------------
262
+ # Add the missing extract_outlines function
263
+ # ---------------------
264
+ def extract_outlines(binary_image: np.ndarray) -> (np.ndarray, list):
265
+ contours, _ = cv2.findContours(binary_image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
266
+ outline_image = np.zeros_like(binary_image)
267
+ cv2.drawContours(outline_image, contours, -1, (255), thickness=2)
268
+ return cv2.bitwise_not(outline_image), contours
269
+
270
+ # ---------------------
271
+ # Functions for Finger Cut Clearance
272
+ # ---------------------
273
+ def union_tool_and_circle(tool_polygon: Polygon, center_inch, circle_diameter=1.0):
274
+ radius = circle_diameter / 2.0
275
+ circle_poly = Point(center_inch).buffer(radius, resolution=64)
276
+ union_poly = tool_polygon.union(circle_poly)
277
+ return union_poly
278
+
279
+ def build_tool_polygon(points_inch):
280
+ return Polygon(points_inch)
281
+
282
+ def polygon_to_exterior_coords(poly: Polygon): # works fine
283
+ if poly.geom_type == "MultiPolygon":
284
+ biggest = max(poly.geoms, key=lambda g: g.area)
285
+ poly = biggest
286
+ if not poly.exterior:
287
+ return []
288
+ return list(poly.exterior.coords)
289
+
290
+
291
+
292
+
293
+ # def place_finger_cut_adjusted(tool_polygon, points_inch, existing_centers, all_polygons, circle_diameter=1, min_gap=1, max_attempts=500): #1st best
294
+ # needed_center_distance = circle_diameter + min_gap
295
+ # radius = circle_diameter / 2.0
296
+ # import random
297
+ # for _ in range(max_attempts):
298
+ # idx = random.randint(0, len(points_inch) - 1)
299
+ # cx, cy = points_inch[idx]
300
+
301
+ # # Check if this point is too close to an existing center
302
+ # too_close = any(np.hypot(cx - ex_x, cy - ex_y) < needed_center_distance for ex_x, ex_y in existing_centers)
303
+ # if too_close:
304
+ # continue
305
+
306
+ # # Create the finger cut circle and try adding it to the tool
307
+ # circle_poly = Point((cx, cy)).buffer(radius, resolution=64)
308
+ # union_poly = tool_polygon.union(circle_poly)
309
+
310
+ # # Check for overlap and spacing with other tools
311
+ # overlap_with_others = False
312
+ # too_close_to_others = False
313
+
314
+ # for poly in all_polygons:
315
+ # if poly.equals(tool_polygon):
316
+ # continue # Skip comparing the tool to itself
317
+
318
+ # if union_poly.buffer(min_gap).intersects(poly) > 1e-6:
319
+ # overlap_with_others = True
320
+ # break
321
+
322
+ # if circle_poly.buffer(min_gap).intersects(poly) > 1e-6:
323
+ # too_close_to_others = True
324
+ # break
325
+
326
+ # if overlap_with_others or too_close_to_others:
327
+ # continue
328
+
329
+ # existing_centers.append((cx, cy))
330
+ # return union_poly, (cx, cy)
331
+
332
+ # print("Warning: Could not place a finger cut circle meeting all spacing requirements.")
333
+ # return None, None
334
+
335
+ import numpy as np
336
+ from shapely.geometry import Point
337
+
338
+
339
+ # def place_finger_cut_adjusted(tool_polygon, points_inch, existing_centers, all_polygons, circle_diameter=1.0, min_gap=0.35, max_attempts=2000): #Best best
340
+ # import random
341
+ # import numpy as np
342
+ # from shapely.geometry import Point
343
+
344
+ # needed_center_distance = circle_diameter + min_gap
345
+ # radius = circle_diameter / 2.0
346
+ # attempts = 0
347
+ # indices = list(range(len(points_inch)))
348
+ # random.shuffle(indices) # Shuffle indices for randomness
349
+
350
+ # # Try a grid of adjustments around each candidate point
351
+ # adjustments = list(np.linspace(-0.15, 0.10, 7)) # More adjustment options
352
+
353
+ # for i in indices:
354
+ # if attempts >= max_attempts:
355
+ # break
356
+
357
+ # cx, cy = points_inch[i]
358
+
359
+ # # Try small adjustments around the chosen candidate
360
+ # for dx in adjustments:
361
+ # for dy in adjustments:
362
+ # attempts += 1
363
+ # if attempts >= max_attempts:
364
+ # break
365
+
366
+ # candidate_center = (cx + dx, cy + dy)
367
+
368
+ # # Check distance from already placed centers
369
+ # too_close_to_existing = False
370
+ # for ex, ey in existing_centers:
371
+ # if np.hypot(candidate_center[0] - ex, candidate_center[1] - ey) < needed_center_distance:
372
+ # too_close_to_existing = True
373
+ # break
374
+
375
+ # if too_close_to_existing:
376
+ # continue
377
+
378
+ # # Create circle polygon for this candidate
379
+ # circle_poly = Point(candidate_center).buffer(radius, resolution=64)
380
+
381
+ # # Create the union with the tool polygon
382
+ # union_poly = tool_polygon.union(circle_poly)
383
+
384
+ # # Buffer the circle to check minimum gap requirements
385
+ # circle_buffer = circle_poly.buffer(min_gap, resolution=32)
386
+ # coords = polygon_to_exterior_coords(union_poly)
387
+
388
+ # # Check against all other polygons for overlap or proximity issues
389
+ # overlap = False
390
+ # for poly in all_polygons:
391
+ # if poly == tool_polygon:
392
+ # continue # Skip comparing to self
393
+ # if len(coords) < 4:
394
+ # # It's degenerate or not a valid polygon for your purposes; skip
395
+ # break
396
+
397
+ # # Check if the union overlaps with any other polygon
398
+ # if union_poly.intersects(poly):
399
+ # overlap = True
400
+ # break
401
+
402
+ # # Check if the buffered circle (circle + min_gap) intersects with any other polygon
403
+ # if circle_buffer.intersects(poly):
404
+ # overlap = True
405
+ # break
406
+
407
+ # if not overlap:
408
+ # # If candidate passes all checks, accept it
409
+ # existing_centers.append(candidate_center)
410
+ # return union_poly, candidate_center
411
+
412
+ # print(f"Warning: Could not place a finger cut circle after {attempts} attempts. Consider adjusting parameters.")
413
+ # return None, None
414
+
415
+
416
+ def place_finger_cut_adjusted(tool_polygon, points_inch, existing_centers, all_polygons, circle_diameter=1.0, min_gap=0.25, max_attempts=100):
417
+ import random
418
+ needed_center_distance = circle_diameter + min_gap
419
+ radius = circle_diameter / 2.0
420
+ attempts = 0
421
+ indices = list(range(len(points_inch)))
422
+ random.shuffle(indices) # Shuffle indices for randomness
423
+
424
+ for i in indices:
425
+ if attempts >= max_attempts:
426
+ break
427
+ cx, cy = points_inch[i]
428
+ # Try small adjustments around the chosen candidate
429
+ for dx in np.linspace(-0.1, 0.1, 10):
430
+ for dy in np.linspace(-0.1, 0.1, 10):
431
+ candidate_center = (cx + dx, cy + dy)
432
+ # Check distance from already placed centers
433
+ if any(np.hypot(candidate_center[0] - ex, candidate_center[1] - ey) < needed_center_distance for ex, ey in existing_centers):
434
+ continue
435
+
436
+ union_poly= union_tool_and_circle(tool_polygon,candidate_center)
437
+ overlap = False
438
+ # Check against other tool polygons for overlap or proximity issues
439
+ for poly in all_polygons:
440
+ if poly == tool_polygon:
441
+ continue
442
+ if union_poly.intersects(poly) or union_poly.buffer(min_gap).intersects(poly):
443
+ overlap = True
444
+ break
445
+ if overlap:
446
+ continue
447
+ # If candidate passes, accept it
448
+ existing_centers.append(candidate_center)
449
+ return union_poly, candidate_center
450
+ attempts += 1
451
+ print("Warning: Could not place a finger cut circle meeting all spacing requirements.")
452
+ return None, None
453
+ # ---------------------
454
+ # DXF Spline and Boundary Functions
455
+ # ---------------------
456
+ def save_dxf_spline(inflated_contours, scaling_factor, height, finger_clearance=False): # works
457
+ degree = 3
458
+ closed = True
459
+ doc = ezdxf.new(units=0)
460
+ doc.units = ezdxf.units.IN
461
+ doc.header["$INSUNITS"] = ezdxf.units.IN
462
+ msp = doc.modelspace()
463
+ finger_cut_centers = []
464
+ final_polygons_inch = []
465
+ for contour in inflated_contours:
466
+ try:
467
+ resampled_contour = resample_contour(contour)
468
+ points_inch = [(x * scaling_factor, (height - y) * scaling_factor) for x, y in resampled_contour]
469
+ if len(points_inch) < 3:
470
+ continue
471
+ if np.linalg.norm(np.array(points_inch[0]) - np.array(points_inch[-1])) > 1e-2:
472
+ points_inch.append(points_inch[0])
473
+ tool_polygon = build_tool_polygon(points_inch)
474
+ if finger_clearance:
475
+ union_poly, center = place_finger_cut_adjusted(tool_polygon, points_inch, finger_cut_centers, final_polygons_inch)
476
+ if union_poly is not None:
477
+ tool_polygon = union_poly
478
+ exterior_coords = polygon_to_exterior_coords(tool_polygon)
479
+ if len(exterior_coords) < 3:
480
+ continue
481
+ msp.add_spline(exterior_coords, degree=degree, dxfattribs={"layer": "TOOLS"})
482
+ final_polygons_inch.append(tool_polygon)
483
+ except ValueError as e:
484
+ print(f"Skipping contour: {e}")
485
+ return doc, final_polygons_inch
486
+
487
+
488
+
489
+ def add_rectangular_boundary(doc, polygons_inch, boundary_length, boundary_width, offset_unit, annotation_text="", image_height_in=None, image_width_in=None):
490
+ msp = doc.modelspace()
491
+ # Convert from mm if necessary
492
+ if offset_unit.lower() == "mm":
493
+ if boundary_length < 50:
494
+ boundary_length = boundary_length * 25.4
495
+ if boundary_width < 50:
496
+ boundary_width = boundary_width * 25.4
497
+ boundary_length_in = boundary_length / 25.4
498
+ boundary_width_in = boundary_width / 25.4
499
+ else:
500
+ boundary_length_in = boundary_length
501
+ boundary_width_in = boundary_width
502
+
503
+ # Compute bounding box of inner contours
504
+ min_x = float("inf")
505
+ min_y = float("inf")
506
+ max_x = -float("inf")
507
+ max_y = -float("inf")
508
+ for poly in polygons_inch:
509
+ b = poly.bounds
510
+ min_x = min(min_x, b[0])
511
+ min_y = min(min_y, b[1])
512
+ max_x = max(max_x, b[2])
513
+ max_y = max(max_y, b[3])
514
+ if min_x == float("inf"):
515
+ print("No tool polygons found, skipping boundary.")
516
+ return None
517
+
518
+ # Compute inner bounding box dimensions
519
+ inner_width = max_x - min_x
520
+ inner_length = max_y - min_y
521
+
522
+ # Set clearance margins
523
+ clearance_side = 0.25 # left/right clearance
524
+ clearance_tb = 0.25 # top/bottom clearance
525
+ if annotation_text.strip():
526
+ clearance_tb = 0.75
527
+
528
+ # Calculate center of inner contours
529
+ center_x = (min_x + max_x) / 2
530
+ center_y = (min_y + max_y) / 2
531
+
532
+ # Draw rectangle centered at (center_x, center_y)
533
+ left = center_x - boundary_width_in / 2
534
+ right = center_x + boundary_width_in / 2
535
+ bottom = center_y - boundary_length_in / 2
536
+ top = center_y + boundary_length_in / 2
537
+
538
+ rect_coords = [(left, bottom), (right, bottom), (right, top), (left, top), (left, bottom)]
539
+ from shapely.geometry import Polygon as ShapelyPolygon
540
+ boundary_polygon = ShapelyPolygon(rect_coords)
541
+ msp.add_lwpolyline(rect_coords, close=True, dxfattribs={"layer": "BOUNDARY"})
542
+
543
+ text_top = boundary_polygon.bounds[1] + 1
544
+ too_small = boundary_width_in < inner_width + 2 * clearance_side or boundary_length_in < inner_length + 2 * clearance_tb
545
+ if too_small:
546
+ raise BoundaryOverlapError("Error: The specified boundary dimensions are too small and overlap with the inner contours. Please provide larger values.")
547
+ if annotation_text.strip() and text_top > min_y - 0.75:
548
+ raise TextOverlapError("Error: The text is too close to the inner contours. Please increase boundary length.")
549
+ return boundary_polygon
550
+
551
+ def draw_polygons_inch(polygons_inch, image_rgb, scaling_factor, image_height, color=(0,0,255), thickness=2):
552
+ for poly in polygons_inch:
553
+ if poly.geom_type == "MultiPolygon":
554
+ for subpoly in poly.geoms:
555
+ draw_single_polygon(subpoly, image_rgb, scaling_factor, image_height, color, thickness)
556
+ else:
557
+ draw_single_polygon(poly, image_rgb, scaling_factor, image_height, color, thickness)
558
+
559
+ def draw_single_polygon(poly, image_rgb, scaling_factor, image_height, color=(0,0,255), thickness=2):
560
+ ext = list(poly.exterior.coords)
561
+ if len(ext) < 3:
562
+ return
563
+ pts_px = []
564
+ for (x_in, y_in) in ext:
565
+ px = int(x_in / scaling_factor)
566
+ py = int(image_height - (y_in / scaling_factor))
567
+ pts_px.append([px, py])
568
+ pts_px = np.array(pts_px, dtype=np.int32)
569
+ cv2.polylines(image_rgb, [pts_px], isClosed=True, color=color, thickness=thickness, lineType=cv2.LINE_AA)
570
+
571
+ # ---------------------
572
+ # Main Predict Function with Finger Cut Clearance, Boundary Box, Annotation and Sharpness Enhancement
573
+ # ---------------------
574
+ def predict(
575
+ image: Union[str, bytes, np.ndarray],
576
+ offset_value: float,
577
+ offset_unit: str, # "mm" or "inches"
578
+ finger_clearance: str, # "Yes" or "No"
579
+ add_boundary: str, # "Yes" or "No"
580
+ boundary_length: float,
581
+ boundary_width: float,
582
+ annotation_text: str
583
+ ):
584
+ overall_start = time.time()
585
+ # Convert image to NumPy array if needed
586
+ if isinstance(image, str):
587
+ if os.path.exists(image):
588
+ image = np.array(Image.open(image).convert("RGB"))
589
+ else:
590
+ try:
591
+ image = np.array(Image.open(io.BytesIO(base64.b64decode(image))).convert("RGB"))
592
+ except Exception:
593
+ raise ValueError("Invalid base64 image data")
594
+
595
+ # Apply brightness and sharpness enhancement
596
+ if isinstance(image, np.ndarray):
597
+ pil_image = Image.fromarray(image)
598
+ enhanced_image = ImageEnhance.Sharpness(pil_image).enhance(1.5)
599
+ image = np.array(enhanced_image)
600
+
601
+ # ---------------------
602
+ # 1) Detect the drawer with YOLOWorld (or use original image if not detected)
603
+ # ---------------------
604
+ drawer_detected = True
605
+ try:
606
+ t = time.time()
607
+ drawer_img = yolo_detect(image)
608
+ print("Drawer detection completed in {:.2f} seconds".format(time.time() - t))
609
+ except DrawerNotDetectedError as e:
610
+ print(f"Drawer not detected: {e}, using original image.")
611
+ drawer_detected = False
612
+ drawer_img = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
613
+
614
+ # Process the image (either cropped drawer or original)
615
+ t = time.time()
616
+ if drawer_detected:
617
+ # For detected drawers: shrink and square
618
+ shrunked_img = make_square(shrink_bbox(drawer_img, 0.90))
619
+ else:
620
+ # For non-drawer images: keep original dimensions
621
+ shrunked_img = drawer_img # Already in BGR format from above
622
+ del drawer_img
623
+ gc.collect()
624
+ print("Image processing completed in {:.2f} seconds".format(time.time() - t))
625
+
626
+ # ---------------------
627
+ # 2) Detect the reference box with YOLO (now works on either cropped or original image)
628
+ # ---------------------
629
+ try:
630
+ t = time.time()
631
+ reference_obj_img, scaling_box_coords = detect_reference_square(shrunked_img)
632
+ print("Reference coin detection completed in {:.2f} seconds".format(time.time() - t))
633
+ except ReferenceBoxNotDetectedError as e:
634
+ return None, None, None, None, f"Error: {str(e)}"
635
+
636
+ # ---------------------
637
+ # 3) Remove background of the reference box to compute scaling factor
638
+ # ---------------------
639
+ t = time.time()
640
+ reference_obj_img = make_square(reference_obj_img)
641
+ reference_square_mask = remove_bg_u2netp(reference_obj_img)
642
+ reference_square_mask= resize_img(reference_square_mask,(reference_obj_img.shape[1],reference_obj_img.shape[0]))
643
+ print("Reference image processing completed in {:.2f} seconds".format(time.time() - t))
644
+
645
+ t = time.time()
646
+ try:
647
+ cv2.imwrite("mask.jpg", cv2.cvtColor(reference_obj_img, cv2.COLOR_RGB2GRAY))
648
+ scaling_factor = calculate_scaling_factor(
649
+ target_image=reference_square_mask,
650
+ reference_obj_size_mm=0.955,
651
+ feature_detector="ORB",
652
+ )
653
+ except ZeroDivisionError:
654
+ scaling_factor = None
655
+ print("Error calculating scaling factor: Division by zero")
656
+ except Exception as e:
657
+ scaling_factor = None
658
+ print(f"Error calculating scaling factor: {e}")
659
+
660
+ if scaling_factor is None or scaling_factor == 0:
661
+ scaling_factor = 0.7
662
+ print("Using default scaling factor of 0.7 due to calculation error")
663
+ gc.collect()
664
+ print("Scaling factor determined: {}".format(scaling_factor))
665
+
666
+ # ---------------------
667
+ # 4) Optional boundary dimension checks (now without size limits)
668
+ # ---------------------
669
+ if add_boundary.lower() == "yes":
670
+ if offset_unit.lower() == "mm":
671
+ if boundary_length < 50:
672
+ boundary_length = boundary_length * 25.4
673
+ if boundary_width < 50:
674
+ boundary_width = boundary_width * 25.4
675
+ boundary_length_in = boundary_length / 25.4
676
+ boundary_width_in = boundary_width / 25.4
677
+ else:
678
+ boundary_length_in = boundary_length
679
+ boundary_width_in = boundary_width
680
+
681
+ # ---------------------
682
+ # 5) Remove background from the shrunked drawer image (main objects)
683
+ # ---------------------
684
+ if offset_unit.lower() == "mm":
685
+ if offset_value < 1:
686
+ offset_value = offset_value * 25.4
687
+ offset_inches = offset_value / 25.4
688
+ else:
689
+ offset_inches = offset_value
690
+
691
+ t = time.time()
692
+ orig_size = shrunked_img.shape[:2]
693
+ objects_mask = remove_bg(shrunked_img)
694
+ processed_size = objects_mask.shape[:2]
695
+
696
+ objects_mask = exclude_scaling_box(objects_mask, scaling_box_coords, orig_size, processed_size, expansion_factor=1.2)
697
+ objects_mask = resize_img(objects_mask, (shrunked_img.shape[1], shrunked_img.shape[0]))
698
+ del scaling_box_coords
699
+ gc.collect()
700
+ print("Object masking completed in {:.2f} seconds".format(time.time() - t))
701
+
702
+ # Dilate mask by offset_pixels
703
+ t = time.time()
704
+ offset_pixels = (offset_inches / scaling_factor) * 2 + 1 if scaling_factor != 0 else 1
705
+ dilated_mask = cv2.dilate(objects_mask, np.ones((int(offset_pixels), int(offset_pixels)), np.uint8))
706
+ del objects_mask
707
+ gc.collect()
708
+ print("Mask dilation completed in {:.2f} seconds".format(time.time() - t))
709
+
710
+ Image.fromarray(dilated_mask).save("./outputs/scaled_mask_new.jpg")
711
+
712
+ # ---------------------
713
+ # 6) Extract outlines from the mask and convert them to DXF splines
714
+ # ---------------------
715
+ t = time.time()
716
+ outlines, contours = extract_outlines(dilated_mask)
717
+ print("Outline extraction completed in {:.2f} seconds".format(time.time() - t))
718
+
719
+ output_img = shrunked_img.copy()
720
+ del shrunked_img
721
+ gc.collect()
722
+
723
+ t = time.time()
724
+ use_finger_clearance = True if finger_clearance.lower() == "yes" else False
725
+ doc, final_polygons_inch = save_dxf_spline(
726
+ contours, scaling_factor, processed_size[0], finger_clearance=use_finger_clearance
727
+ )
728
+ del contours
729
+ gc.collect()
730
+ print("DXF generation completed in {:.2f} seconds".format(time.time() - t))
731
+
732
+ # ---------------------
733
+ # Compute bounding box of inner tool contours BEFORE adding optional boundary
734
+ # ---------------------
735
+ inner_min_x = float("inf")
736
+ inner_min_y = float("inf")
737
+ inner_max_x = -float("inf")
738
+ inner_max_y = -float("inf")
739
+ for poly in final_polygons_inch:
740
+ b = poly.bounds
741
+ inner_min_x = min(inner_min_x, b[0])
742
+ inner_min_y = min(inner_min_y, b[1])
743
+ inner_max_x = max(inner_max_x, b[2])
744
+ inner_max_y = max(inner_max_y, b[3])
745
+
746
+ # ---------------------
747
+ # 7) Add optional rectangular boundary
748
+ # ---------------------
749
+ boundary_polygon = None
750
+ if add_boundary.lower() == "yes":
751
+ boundary_polygon = add_rectangular_boundary(
752
+ doc,
753
+ final_polygons_inch,
754
+ boundary_length,
755
+ boundary_width,
756
+ offset_unit,
757
+ annotation_text,
758
+ image_height_in=output_img.shape[0] * scaling_factor,
759
+ image_width_in=output_img.shape[1] * scaling_factor
760
+ )
761
+ if boundary_polygon is not None:
762
+ final_polygons_inch.append(boundary_polygon)
763
+ # else:
764
+ # raise boundary_issue("Raised when bounds are given but rectangular boundary is no.")
765
+ # ---------------------
766
+ # 8) Add annotation text (if provided) in the DXF
767
+ # ---------------------
768
+ msp = doc.modelspace()
769
+
770
+ if annotation_text.strip():
771
+ if boundary_polygon is not None:
772
+ text_x = ((inner_min_x + inner_max_x) / 2.0) - (int(len(annotation_text.strip()) / 2.0))
773
+ text_height_dxf = 0.75
774
+ text_y_dxf = boundary_polygon.bounds[1] + 0.25
775
+ font = get_font_face("Arial")
776
+ paths = text2path.make_paths_from_str(
777
+ annotation_text.strip().upper(),
778
+ font=font, # Use default font
779
+ size=text_height_dxf,
780
+ align=TextEntityAlignment.LEFT
781
+ )
782
+
783
+ # Create a translation matrix
784
+ translation = ezdxf.math.Matrix44.translate(text_x, text_y_dxf, 0)
785
+ # Apply the translation to each path
786
+ translated_paths = [p.transform(translation) for p in paths]
787
+
788
+ # Render the paths as splines and polylines
789
+ path.render_splines_and_polylines(
790
+ msp,
791
+ translated_paths,
792
+ dxfattribs={"layer": "ANNOTATION", "color": 7}
793
+ )
794
+
795
+ # Save the DXF
796
+ dxf_filepath = os.path.join("./outputs", "out.dxf")
797
+ doc.saveas(dxf_filepath)
798
+
799
+ # ---------------------
800
+ # 9) For the preview images, draw the polygons and place text similarly
801
+ # ---------------------
802
+ draw_polygons_inch(final_polygons_inch, output_img, scaling_factor, processed_size[0], color=(0, 0, 255), thickness=2)
803
+ new_outlines = np.ones_like(output_img) * 255
804
+ draw_polygons_inch(final_polygons_inch, new_outlines, scaling_factor, processed_size[0], color=(0, 0, 255), thickness=2)
805
+
806
+ if annotation_text.strip():
807
+ if boundary_polygon is not None:
808
+ text_height_cv = 0.75
809
+ text_x_img = int(((inner_min_x + inner_max_x) / 2.0) / scaling_factor)
810
+ text_y_in = boundary_polygon.bounds[1] + 0.25
811
+ text_y_img = int(processed_size[0] - (text_y_in / scaling_factor))
812
+ org = (text_x_img - int(len(annotation_text.strip()) * 6), text_y_img)
813
+
814
+ # Method 2: Use two different thicknesses
815
+ # Draw thicker outline
816
+ temp_img = np.zeros_like(output_img)
817
+
818
+ cv2.putText(
819
+ temp_img,
820
+ annotation_text.strip().upper(),
821
+ org,
822
+ cv2.FONT_HERSHEY_SIMPLEX,
823
+ 2,
824
+ (0, 0, 255), # Red color
825
+ 4, # Thicker outline
826
+ cv2.LINE_AA
827
+ )
828
+
829
+ cv2.putText(
830
+ temp_img,
831
+ annotation_text.strip().upper(),
832
+ org,
833
+ cv2.FONT_HERSHEY_SIMPLEX,
834
+ 2,
835
+ (0, 0, 0), # Black to create hole
836
+ 2, # Thinner inner part
837
+ cv2.LINE_AA
838
+ )
839
+
840
+ outline_mask = cv2.cvtColor(temp_img, cv2.COLOR_BGR2GRAY)
841
+ _, outline_mask = cv2.threshold(outline_mask, 1, 255, cv2.THRESH_BINARY)
842
+
843
+ output_img[outline_mask > 0] = temp_img[outline_mask > 0]
844
+
845
+ cv2.putText(
846
+ new_outlines,
847
+ annotation_text.strip().upper(),
848
+ org,
849
+ cv2.FONT_HERSHEY_SIMPLEX,
850
+ 2,
851
+ (0, 0, 255), # Red color
852
+ 4, # Thicker outline
853
+ cv2.LINE_AA
854
+ )
855
+
856
+ cv2.putText(
857
+ new_outlines,
858
+ annotation_text.strip().upper(),
859
+ org,
860
+ cv2.FONT_HERSHEY_SIMPLEX,
861
+ 2,
862
+ (255, 255, 255), # Inner text in white
863
+ 2, # Thinner inner part
864
+ cv2.LINE_AA
865
+ )
866
+
867
+ outlines_color = cv2.cvtColor(new_outlines, cv2.COLOR_BGR2RGB)
868
+ print("Total prediction time: {:.2f} seconds".format(time.time() - overall_start))
869
+
870
+ return (
871
+ cv2.cvtColor(output_img, cv2.COLOR_BGR2RGB),
872
+ outlines_color,
873
+ dxf_filepath,
874
+ dilated_mask,
875
+ str(scaling_factor)
876
+ )
877
+
878
+ # ---------------------
879
+ # Gradio Interface
880
+ # ---------------------
881
+ if __name__ == "__main__":
882
+ os.makedirs("./outputs", exist_ok=True)
883
+ def gradio_predict(img, offset, offset_unit, finger_clearance, add_boundary, boundary_length, boundary_width, annotation_text):
884
+ try:
885
+ return predict(img, offset, offset_unit, finger_clearance, add_boundary, boundary_length, boundary_width, annotation_text)
886
+ except Exception as e:
887
+ return None, None, None, None, f"Error: {str(e)}"
888
+ iface = gr.Interface(
889
+ fn=gradio_predict,
890
+ inputs=[
891
+ gr.Image(label="Input Image"),
892
+ gr.Number(label="Offset value for Mask", value=0.075),
893
+ gr.Dropdown(label="Offset Unit", choices=["mm", "inches"], value="inches"),
894
+ gr.Dropdown(label="Add Finger Clearance?", choices=["Yes", "No"], value="No"),
895
+ gr.Dropdown(label="Add Rectangular Boundary?", choices=["Yes", "No"], value="No"),
896
+ gr.Number(label="Boundary Length", value=300.0, precision=2),
897
+ gr.Number(label="Boundary Width", value=200.0, precision=2),
898
+ gr.Textbox(label="Annotation (max 20 chars)", max_length=20, placeholder="Type up to 20 characters")
899
+ ],
900
+ outputs=[
901
+ gr.Image(label="Output Image"),
902
+ gr.Image(label="Outlines of Objects"),
903
+ gr.File(label="DXF file"),
904
+ gr.Image(label="Mask"),
905
+ gr.Textbox(label="Scaling Factor (inches/pixel)")
906
+ ],
907
+ examples=[
908
+ ["./Test20.jpg", 0.075, "inches", "No", "No", 300.0, 200.0, "MyTool"],
909
+ ["./Test21.jpg", 0.075, "inches", "Yes", "Yes", 300.0, 200.0, "Tool2"]
910
+ ]
911
+ )
912
+ iface.launch(share=True)
coin_det.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf6007ec3d4cd303af4cba2e202f68600a904eb23dfc736b4aa29a215201036b
3
+ size 5490003
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ transformers
2
+ ultralytics==8.3.9
3
+ ezdxf
4
+ gradio
5
+ pydantic==2.10.6
6
+ kornia
7
+ timm
8
+ einops
9
+ shapely
10
+ gevent==22.10.2
scalingtestupdated.py ADDED
@@ -0,0 +1,178 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import cv2
2
+ import numpy as np
3
+ import os
4
+ import argparse
5
+ from typing import Union
6
+ from matplotlib import pyplot as plt
7
+
8
+
9
+ class ScalingSquareDetector:
10
+ def __init__(self, feature_detector="ORB", debug=False):
11
+ """
12
+ Initialize the detector with the desired feature matching algorithm.
13
+ :param feature_detector: "ORB" or "SIFT" (default is "ORB").
14
+ :param debug: If True, saves intermediate images for debugging.
15
+ """
16
+ self.feature_detector = feature_detector
17
+ self.debug = debug
18
+ self.detector = self._initialize_detector()
19
+
20
+ def _initialize_detector(self):
21
+ """
22
+ Initialize the chosen feature detector.
23
+ :return: OpenCV detector object.
24
+ """
25
+ if self.feature_detector.upper() == "SIFT":
26
+ return cv2.SIFT_create()
27
+ elif self.feature_detector.upper() == "ORB":
28
+ return cv2.ORB_create()
29
+ else:
30
+ raise ValueError("Invalid feature detector. Choose 'ORB' or 'SIFT'.")
31
+
32
+ def find_scaling_square(
33
+ self, target_image, known_size_mm, roi_margin=30
34
+ ):
35
+ """
36
+ Detect the scaling square in the target image based on the reference image.
37
+ :param reference_image_path: Path to the reference image of the square.
38
+ :param target_image_path: Path to the target image containing the square.
39
+ :param known_size_mm: Physical size of the square in millimeters.
40
+ :param roi_margin: Margin to expand the ROI around the detected square (in pixels).
41
+ :return: Scaling factor (mm per pixel).
42
+ """
43
+
44
+ contours, _ = cv2.findContours(
45
+ target_image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE
46
+ )
47
+
48
+ if not contours:
49
+ raise ValueError("No contours found in the cropped ROI.")
50
+
51
+ # # Select the largest square-like contour
52
+ largest_square = None
53
+ # largest_square_area = 0
54
+ # for contour in contours:
55
+ # x_c, y_c, w_c, h_c = cv2.boundingRect(contour)
56
+ # aspect_ratio = w_c / float(h_c)
57
+ # if 0.9 <= aspect_ratio <= 1.1:
58
+ # peri = cv2.arcLength(contour, True)
59
+ # approx = cv2.approxPolyDP(contour, 0.02 * peri, True)
60
+ # if len(approx) == 4:
61
+ # area = cv2.contourArea(contour)
62
+ # if area > largest_square_area:
63
+ # largest_square = contour
64
+ # largest_square_area = area
65
+
66
+ # if largest_square is None:
67
+ # raise ValueError("No square-like contour found in the ROI.")
68
+ for contour in contours:
69
+ largest_square=contour
70
+ # Draw the largest contour on the original image
71
+ target_image_color = cv2.cvtColor(target_image, cv2.COLOR_GRAY2BGR)
72
+ cv2.drawContours(
73
+ target_image_color, largest_square, -1, (255, 0, 0), 3
74
+ )
75
+
76
+ # if self.debug:
77
+ cv2.imwrite("largest_contour.jpg", target_image_color)
78
+
79
+ # Calculate the bounding rectangle of the largest contour
80
+ x, y, w, h = cv2.boundingRect(largest_square)
81
+ square_width_px = w
82
+ square_height_px = h
83
+
84
+ # Calculate the scaling factor
85
+ avg_square_size_px = (square_width_px + square_height_px) / 2
86
+ scaling_factor = known_size_mm / avg_square_size_px # mm per pixel
87
+
88
+ return scaling_factor #, square_height_px, square_width_px, roi_binary
89
+
90
+ def draw_debug_images(self, output_folder):
91
+ """
92
+ Save debug images if enabled.
93
+ :param output_folder: Directory to save debug images.
94
+ """
95
+ if self.debug:
96
+ if not os.path.exists(output_folder):
97
+ os.makedirs(output_folder)
98
+ debug_images = ["largest_contour.jpg"]
99
+ for img_name in debug_images:
100
+ if os.path.exists(img_name):
101
+ os.rename(img_name, os.path.join(output_folder, img_name))
102
+
103
+
104
+ def calculate_scaling_factor(
105
+ target_image,
106
+ reference_obj_size_mm=0.955,
107
+ feature_detector="ORB",
108
+ debug=False,
109
+ roi_margin=30,
110
+ ):
111
+ # Initialize detector
112
+ detector = ScalingSquareDetector(feature_detector=feature_detector, debug=debug)
113
+
114
+ # Find scaling square and calculate scaling factor
115
+ scaling_factor = detector.find_scaling_square(
116
+ target_image=target_image,
117
+ known_size_mm=reference_obj_size_mm,
118
+ roi_margin=roi_margin,
119
+ )
120
+
121
+ # Save debug images
122
+ if debug:
123
+ detector.draw_debug_images("debug_outputs")
124
+
125
+ return scaling_factor
126
+
127
+
128
+ # Example usage:
129
+ if __name__ == "__main__":
130
+ import os
131
+ from PIL import Image
132
+ from ultralytics import YOLO
133
+ from app import yolo_detect, shrink_bbox
134
+ from ultralytics.utils.plotting import save_one_box
135
+
136
+ for idx, file in enumerate(os.listdir("./sample_images")):
137
+ img = np.array(Image.open(os.path.join("./sample_images", file)))
138
+ img = yolo_detect(img, ['box'])
139
+ model = YOLO("./last.pt")
140
+ res = model.predict(img, conf=0.6)
141
+
142
+ box_img = save_one_box(res[0].cpu().boxes.xyxy, im=res[0].orig_img, save=False)
143
+ # img = shrink_bbox(box_img, 1.20)
144
+ cv2.imwrite(f"./outputs/{idx}_{file}", box_img)
145
+
146
+ print("File: ",f"./outputs/{idx}_{file}")
147
+ try:
148
+
149
+ scaling_factor = calculate_scaling_factor(
150
+ target_image=box_img,
151
+ known_square_size_mm=0.955,
152
+ feature_detector="ORB",
153
+ debug=False,
154
+ roi_margin=90,
155
+ )
156
+ # cv2.imwrite(f"./outputs/{idx}_binary_{file}", roi_binary)
157
+
158
+ # Square size in mm
159
+ # square_size_mm = 0.955
160
+
161
+ # # Compute the calculated scaling factors and compare
162
+ # calculated_scaling_factor = square_size_mm / height_px
163
+ # discrepancy = abs(calculated_scaling_factor - scaling_factor)
164
+ # import pprint
165
+ # pprint.pprint({
166
+ # "height_px": height_px,
167
+ # "width_px": width_px,
168
+ # "given_scaling_factor": scaling_factor,
169
+ # "calculated_scaling_factor": calculated_scaling_factor,
170
+ # "discrepancy": discrepancy,
171
+ # })
172
+
173
+
174
+ print(f"Scaling Factor (mm per pixel): {scaling_factor:.6f}")
175
+ except Exception as e:
176
+ from traceback import print_exc
177
+ print(print_exc())
178
+ print(f"Error: {e}")
u2net.py ADDED
@@ -0,0 +1,525 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ import torch.nn.functional as F
4
+
5
+ class REBNCONV(nn.Module):
6
+ def __init__(self,in_ch=3,out_ch=3,dirate=1):
7
+ super(REBNCONV,self).__init__()
8
+
9
+ self.conv_s1 = nn.Conv2d(in_ch,out_ch,3,padding=1*dirate,dilation=1*dirate)
10
+ self.bn_s1 = nn.BatchNorm2d(out_ch)
11
+ self.relu_s1 = nn.ReLU(inplace=True)
12
+
13
+ def forward(self,x):
14
+
15
+ hx = x
16
+ xout = self.relu_s1(self.bn_s1(self.conv_s1(hx)))
17
+
18
+ return xout
19
+
20
+ ## upsample tensor 'src' to have the same spatial size with tensor 'tar'
21
+ def _upsample_like(src,tar):
22
+
23
+ src = F.upsample(src,size=tar.shape[2:],mode='bilinear')
24
+
25
+ return src
26
+
27
+
28
+ ### RSU-7 ###
29
+ class RSU7(nn.Module):#UNet07DRES(nn.Module):
30
+
31
+ def __init__(self, in_ch=3, mid_ch=12, out_ch=3):
32
+ super(RSU7,self).__init__()
33
+
34
+ self.rebnconvin = REBNCONV(in_ch,out_ch,dirate=1)
35
+
36
+ self.rebnconv1 = REBNCONV(out_ch,mid_ch,dirate=1)
37
+ self.pool1 = nn.MaxPool2d(2,stride=2,ceil_mode=True)
38
+
39
+ self.rebnconv2 = REBNCONV(mid_ch,mid_ch,dirate=1)
40
+ self.pool2 = nn.MaxPool2d(2,stride=2,ceil_mode=True)
41
+
42
+ self.rebnconv3 = REBNCONV(mid_ch,mid_ch,dirate=1)
43
+ self.pool3 = nn.MaxPool2d(2,stride=2,ceil_mode=True)
44
+
45
+ self.rebnconv4 = REBNCONV(mid_ch,mid_ch,dirate=1)
46
+ self.pool4 = nn.MaxPool2d(2,stride=2,ceil_mode=True)
47
+
48
+ self.rebnconv5 = REBNCONV(mid_ch,mid_ch,dirate=1)
49
+ self.pool5 = nn.MaxPool2d(2,stride=2,ceil_mode=True)
50
+
51
+ self.rebnconv6 = REBNCONV(mid_ch,mid_ch,dirate=1)
52
+
53
+ self.rebnconv7 = REBNCONV(mid_ch,mid_ch,dirate=2)
54
+
55
+ self.rebnconv6d = REBNCONV(mid_ch*2,mid_ch,dirate=1)
56
+ self.rebnconv5d = REBNCONV(mid_ch*2,mid_ch,dirate=1)
57
+ self.rebnconv4d = REBNCONV(mid_ch*2,mid_ch,dirate=1)
58
+ self.rebnconv3d = REBNCONV(mid_ch*2,mid_ch,dirate=1)
59
+ self.rebnconv2d = REBNCONV(mid_ch*2,mid_ch,dirate=1)
60
+ self.rebnconv1d = REBNCONV(mid_ch*2,out_ch,dirate=1)
61
+
62
+ def forward(self,x):
63
+
64
+ hx = x
65
+ hxin = self.rebnconvin(hx)
66
+
67
+ hx1 = self.rebnconv1(hxin)
68
+ hx = self.pool1(hx1)
69
+
70
+ hx2 = self.rebnconv2(hx)
71
+ hx = self.pool2(hx2)
72
+
73
+ hx3 = self.rebnconv3(hx)
74
+ hx = self.pool3(hx3)
75
+
76
+ hx4 = self.rebnconv4(hx)
77
+ hx = self.pool4(hx4)
78
+
79
+ hx5 = self.rebnconv5(hx)
80
+ hx = self.pool5(hx5)
81
+
82
+ hx6 = self.rebnconv6(hx)
83
+
84
+ hx7 = self.rebnconv7(hx6)
85
+
86
+ hx6d = self.rebnconv6d(torch.cat((hx7,hx6),1))
87
+ hx6dup = _upsample_like(hx6d,hx5)
88
+
89
+ hx5d = self.rebnconv5d(torch.cat((hx6dup,hx5),1))
90
+ hx5dup = _upsample_like(hx5d,hx4)
91
+
92
+ hx4d = self.rebnconv4d(torch.cat((hx5dup,hx4),1))
93
+ hx4dup = _upsample_like(hx4d,hx3)
94
+
95
+ hx3d = self.rebnconv3d(torch.cat((hx4dup,hx3),1))
96
+ hx3dup = _upsample_like(hx3d,hx2)
97
+
98
+ hx2d = self.rebnconv2d(torch.cat((hx3dup,hx2),1))
99
+ hx2dup = _upsample_like(hx2d,hx1)
100
+
101
+ hx1d = self.rebnconv1d(torch.cat((hx2dup,hx1),1))
102
+
103
+ return hx1d + hxin
104
+
105
+ ### RSU-6 ###
106
+ class RSU6(nn.Module):#UNet06DRES(nn.Module):
107
+
108
+ def __init__(self, in_ch=3, mid_ch=12, out_ch=3):
109
+ super(RSU6,self).__init__()
110
+
111
+ self.rebnconvin = REBNCONV(in_ch,out_ch,dirate=1)
112
+
113
+ self.rebnconv1 = REBNCONV(out_ch,mid_ch,dirate=1)
114
+ self.pool1 = nn.MaxPool2d(2,stride=2,ceil_mode=True)
115
+
116
+ self.rebnconv2 = REBNCONV(mid_ch,mid_ch,dirate=1)
117
+ self.pool2 = nn.MaxPool2d(2,stride=2,ceil_mode=True)
118
+
119
+ self.rebnconv3 = REBNCONV(mid_ch,mid_ch,dirate=1)
120
+ self.pool3 = nn.MaxPool2d(2,stride=2,ceil_mode=True)
121
+
122
+ self.rebnconv4 = REBNCONV(mid_ch,mid_ch,dirate=1)
123
+ self.pool4 = nn.MaxPool2d(2,stride=2,ceil_mode=True)
124
+
125
+ self.rebnconv5 = REBNCONV(mid_ch,mid_ch,dirate=1)
126
+
127
+ self.rebnconv6 = REBNCONV(mid_ch,mid_ch,dirate=2)
128
+
129
+ self.rebnconv5d = REBNCONV(mid_ch*2,mid_ch,dirate=1)
130
+ self.rebnconv4d = REBNCONV(mid_ch*2,mid_ch,dirate=1)
131
+ self.rebnconv3d = REBNCONV(mid_ch*2,mid_ch,dirate=1)
132
+ self.rebnconv2d = REBNCONV(mid_ch*2,mid_ch,dirate=1)
133
+ self.rebnconv1d = REBNCONV(mid_ch*2,out_ch,dirate=1)
134
+
135
+ def forward(self,x):
136
+
137
+ hx = x
138
+
139
+ hxin = self.rebnconvin(hx)
140
+
141
+ hx1 = self.rebnconv1(hxin)
142
+ hx = self.pool1(hx1)
143
+
144
+ hx2 = self.rebnconv2(hx)
145
+ hx = self.pool2(hx2)
146
+
147
+ hx3 = self.rebnconv3(hx)
148
+ hx = self.pool3(hx3)
149
+
150
+ hx4 = self.rebnconv4(hx)
151
+ hx = self.pool4(hx4)
152
+
153
+ hx5 = self.rebnconv5(hx)
154
+
155
+ hx6 = self.rebnconv6(hx5)
156
+
157
+
158
+ hx5d = self.rebnconv5d(torch.cat((hx6,hx5),1))
159
+ hx5dup = _upsample_like(hx5d,hx4)
160
+
161
+ hx4d = self.rebnconv4d(torch.cat((hx5dup,hx4),1))
162
+ hx4dup = _upsample_like(hx4d,hx3)
163
+
164
+ hx3d = self.rebnconv3d(torch.cat((hx4dup,hx3),1))
165
+ hx3dup = _upsample_like(hx3d,hx2)
166
+
167
+ hx2d = self.rebnconv2d(torch.cat((hx3dup,hx2),1))
168
+ hx2dup = _upsample_like(hx2d,hx1)
169
+
170
+ hx1d = self.rebnconv1d(torch.cat((hx2dup,hx1),1))
171
+
172
+ return hx1d + hxin
173
+
174
+ ### RSU-5 ###
175
+ class RSU5(nn.Module):#UNet05DRES(nn.Module):
176
+
177
+ def __init__(self, in_ch=3, mid_ch=12, out_ch=3):
178
+ super(RSU5,self).__init__()
179
+
180
+ self.rebnconvin = REBNCONV(in_ch,out_ch,dirate=1)
181
+
182
+ self.rebnconv1 = REBNCONV(out_ch,mid_ch,dirate=1)
183
+ self.pool1 = nn.MaxPool2d(2,stride=2,ceil_mode=True)
184
+
185
+ self.rebnconv2 = REBNCONV(mid_ch,mid_ch,dirate=1)
186
+ self.pool2 = nn.MaxPool2d(2,stride=2,ceil_mode=True)
187
+
188
+ self.rebnconv3 = REBNCONV(mid_ch,mid_ch,dirate=1)
189
+ self.pool3 = nn.MaxPool2d(2,stride=2,ceil_mode=True)
190
+
191
+ self.rebnconv4 = REBNCONV(mid_ch,mid_ch,dirate=1)
192
+
193
+ self.rebnconv5 = REBNCONV(mid_ch,mid_ch,dirate=2)
194
+
195
+ self.rebnconv4d = REBNCONV(mid_ch*2,mid_ch,dirate=1)
196
+ self.rebnconv3d = REBNCONV(mid_ch*2,mid_ch,dirate=1)
197
+ self.rebnconv2d = REBNCONV(mid_ch*2,mid_ch,dirate=1)
198
+ self.rebnconv1d = REBNCONV(mid_ch*2,out_ch,dirate=1)
199
+
200
+ def forward(self,x):
201
+
202
+ hx = x
203
+
204
+ hxin = self.rebnconvin(hx)
205
+
206
+ hx1 = self.rebnconv1(hxin)
207
+ hx = self.pool1(hx1)
208
+
209
+ hx2 = self.rebnconv2(hx)
210
+ hx = self.pool2(hx2)
211
+
212
+ hx3 = self.rebnconv3(hx)
213
+ hx = self.pool3(hx3)
214
+
215
+ hx4 = self.rebnconv4(hx)
216
+
217
+ hx5 = self.rebnconv5(hx4)
218
+
219
+ hx4d = self.rebnconv4d(torch.cat((hx5,hx4),1))
220
+ hx4dup = _upsample_like(hx4d,hx3)
221
+
222
+ hx3d = self.rebnconv3d(torch.cat((hx4dup,hx3),1))
223
+ hx3dup = _upsample_like(hx3d,hx2)
224
+
225
+ hx2d = self.rebnconv2d(torch.cat((hx3dup,hx2),1))
226
+ hx2dup = _upsample_like(hx2d,hx1)
227
+
228
+ hx1d = self.rebnconv1d(torch.cat((hx2dup,hx1),1))
229
+
230
+ return hx1d + hxin
231
+
232
+ ### RSU-4 ###
233
+ class RSU4(nn.Module):#UNet04DRES(nn.Module):
234
+
235
+ def __init__(self, in_ch=3, mid_ch=12, out_ch=3):
236
+ super(RSU4,self).__init__()
237
+
238
+ self.rebnconvin = REBNCONV(in_ch,out_ch,dirate=1)
239
+
240
+ self.rebnconv1 = REBNCONV(out_ch,mid_ch,dirate=1)
241
+ self.pool1 = nn.MaxPool2d(2,stride=2,ceil_mode=True)
242
+
243
+ self.rebnconv2 = REBNCONV(mid_ch,mid_ch,dirate=1)
244
+ self.pool2 = nn.MaxPool2d(2,stride=2,ceil_mode=True)
245
+
246
+ self.rebnconv3 = REBNCONV(mid_ch,mid_ch,dirate=1)
247
+
248
+ self.rebnconv4 = REBNCONV(mid_ch,mid_ch,dirate=2)
249
+
250
+ self.rebnconv3d = REBNCONV(mid_ch*2,mid_ch,dirate=1)
251
+ self.rebnconv2d = REBNCONV(mid_ch*2,mid_ch,dirate=1)
252
+ self.rebnconv1d = REBNCONV(mid_ch*2,out_ch,dirate=1)
253
+
254
+ def forward(self,x):
255
+
256
+ hx = x
257
+
258
+ hxin = self.rebnconvin(hx)
259
+
260
+ hx1 = self.rebnconv1(hxin)
261
+ hx = self.pool1(hx1)
262
+
263
+ hx2 = self.rebnconv2(hx)
264
+ hx = self.pool2(hx2)
265
+
266
+ hx3 = self.rebnconv3(hx)
267
+
268
+ hx4 = self.rebnconv4(hx3)
269
+
270
+ hx3d = self.rebnconv3d(torch.cat((hx4,hx3),1))
271
+ hx3dup = _upsample_like(hx3d,hx2)
272
+
273
+ hx2d = self.rebnconv2d(torch.cat((hx3dup,hx2),1))
274
+ hx2dup = _upsample_like(hx2d,hx1)
275
+
276
+ hx1d = self.rebnconv1d(torch.cat((hx2dup,hx1),1))
277
+
278
+ return hx1d + hxin
279
+
280
+ ### RSU-4F ###
281
+ class RSU4F(nn.Module):#UNet04FRES(nn.Module):
282
+
283
+ def __init__(self, in_ch=3, mid_ch=12, out_ch=3):
284
+ super(RSU4F,self).__init__()
285
+
286
+ self.rebnconvin = REBNCONV(in_ch,out_ch,dirate=1)
287
+
288
+ self.rebnconv1 = REBNCONV(out_ch,mid_ch,dirate=1)
289
+ self.rebnconv2 = REBNCONV(mid_ch,mid_ch,dirate=2)
290
+ self.rebnconv3 = REBNCONV(mid_ch,mid_ch,dirate=4)
291
+
292
+ self.rebnconv4 = REBNCONV(mid_ch,mid_ch,dirate=8)
293
+
294
+ self.rebnconv3d = REBNCONV(mid_ch*2,mid_ch,dirate=4)
295
+ self.rebnconv2d = REBNCONV(mid_ch*2,mid_ch,dirate=2)
296
+ self.rebnconv1d = REBNCONV(mid_ch*2,out_ch,dirate=1)
297
+
298
+ def forward(self,x):
299
+
300
+ hx = x
301
+
302
+ hxin = self.rebnconvin(hx)
303
+
304
+ hx1 = self.rebnconv1(hxin)
305
+ hx2 = self.rebnconv2(hx1)
306
+ hx3 = self.rebnconv3(hx2)
307
+
308
+ hx4 = self.rebnconv4(hx3)
309
+
310
+ hx3d = self.rebnconv3d(torch.cat((hx4,hx3),1))
311
+ hx2d = self.rebnconv2d(torch.cat((hx3d,hx2),1))
312
+ hx1d = self.rebnconv1d(torch.cat((hx2d,hx1),1))
313
+
314
+ return hx1d + hxin
315
+
316
+
317
+ ##### U^2-Net ####
318
+ class U2NET(nn.Module):
319
+
320
+ def __init__(self,in_ch=3,out_ch=1):
321
+ super(U2NET,self).__init__()
322
+
323
+ self.stage1 = RSU7(in_ch,32,64)
324
+ self.pool12 = nn.MaxPool2d(2,stride=2,ceil_mode=True)
325
+
326
+ self.stage2 = RSU6(64,32,128)
327
+ self.pool23 = nn.MaxPool2d(2,stride=2,ceil_mode=True)
328
+
329
+ self.stage3 = RSU5(128,64,256)
330
+ self.pool34 = nn.MaxPool2d(2,stride=2,ceil_mode=True)
331
+
332
+ self.stage4 = RSU4(256,128,512)
333
+ self.pool45 = nn.MaxPool2d(2,stride=2,ceil_mode=True)
334
+
335
+ self.stage5 = RSU4F(512,256,512)
336
+ self.pool56 = nn.MaxPool2d(2,stride=2,ceil_mode=True)
337
+
338
+ self.stage6 = RSU4F(512,256,512)
339
+
340
+ # decoder
341
+ self.stage5d = RSU4F(1024,256,512)
342
+ self.stage4d = RSU4(1024,128,256)
343
+ self.stage3d = RSU5(512,64,128)
344
+ self.stage2d = RSU6(256,32,64)
345
+ self.stage1d = RSU7(128,16,64)
346
+
347
+ self.side1 = nn.Conv2d(64,out_ch,3,padding=1)
348
+ self.side2 = nn.Conv2d(64,out_ch,3,padding=1)
349
+ self.side3 = nn.Conv2d(128,out_ch,3,padding=1)
350
+ self.side4 = nn.Conv2d(256,out_ch,3,padding=1)
351
+ self.side5 = nn.Conv2d(512,out_ch,3,padding=1)
352
+ self.side6 = nn.Conv2d(512,out_ch,3,padding=1)
353
+
354
+ self.outconv = nn.Conv2d(6*out_ch,out_ch,1)
355
+
356
+ def forward(self,x):
357
+
358
+ hx = x
359
+
360
+ #stage 1
361
+ hx1 = self.stage1(hx)
362
+ hx = self.pool12(hx1)
363
+
364
+ #stage 2
365
+ hx2 = self.stage2(hx)
366
+ hx = self.pool23(hx2)
367
+
368
+ #stage 3
369
+ hx3 = self.stage3(hx)
370
+ hx = self.pool34(hx3)
371
+
372
+ #stage 4
373
+ hx4 = self.stage4(hx)
374
+ hx = self.pool45(hx4)
375
+
376
+ #stage 5
377
+ hx5 = self.stage5(hx)
378
+ hx = self.pool56(hx5)
379
+
380
+ #stage 6
381
+ hx6 = self.stage6(hx)
382
+ hx6up = _upsample_like(hx6,hx5)
383
+
384
+ #-------------------- decoder --------------------
385
+ hx5d = self.stage5d(torch.cat((hx6up,hx5),1))
386
+ hx5dup = _upsample_like(hx5d,hx4)
387
+
388
+ hx4d = self.stage4d(torch.cat((hx5dup,hx4),1))
389
+ hx4dup = _upsample_like(hx4d,hx3)
390
+
391
+ hx3d = self.stage3d(torch.cat((hx4dup,hx3),1))
392
+ hx3dup = _upsample_like(hx3d,hx2)
393
+
394
+ hx2d = self.stage2d(torch.cat((hx3dup,hx2),1))
395
+ hx2dup = _upsample_like(hx2d,hx1)
396
+
397
+ hx1d = self.stage1d(torch.cat((hx2dup,hx1),1))
398
+
399
+
400
+ #side output
401
+ d1 = self.side1(hx1d)
402
+
403
+ d2 = self.side2(hx2d)
404
+ d2 = _upsample_like(d2,d1)
405
+
406
+ d3 = self.side3(hx3d)
407
+ d3 = _upsample_like(d3,d1)
408
+
409
+ d4 = self.side4(hx4d)
410
+ d4 = _upsample_like(d4,d1)
411
+
412
+ d5 = self.side5(hx5d)
413
+ d5 = _upsample_like(d5,d1)
414
+
415
+ d6 = self.side6(hx6)
416
+ d6 = _upsample_like(d6,d1)
417
+
418
+ d0 = self.outconv(torch.cat((d1,d2,d3,d4,d5,d6),1))
419
+
420
+ return F.sigmoid(d0), F.sigmoid(d1), F.sigmoid(d2), F.sigmoid(d3), F.sigmoid(d4), F.sigmoid(d5), F.sigmoid(d6)
421
+
422
+ ### U^2-Net small ###
423
+ class U2NETP(nn.Module):
424
+
425
+ def __init__(self,in_ch=3,out_ch=1):
426
+ super(U2NETP,self).__init__()
427
+
428
+ self.stage1 = RSU7(in_ch,16,64)
429
+ self.pool12 = nn.MaxPool2d(2,stride=2,ceil_mode=True)
430
+
431
+ self.stage2 = RSU6(64,16,64)
432
+ self.pool23 = nn.MaxPool2d(2,stride=2,ceil_mode=True)
433
+
434
+ self.stage3 = RSU5(64,16,64)
435
+ self.pool34 = nn.MaxPool2d(2,stride=2,ceil_mode=True)
436
+
437
+ self.stage4 = RSU4(64,16,64)
438
+ self.pool45 = nn.MaxPool2d(2,stride=2,ceil_mode=True)
439
+
440
+ self.stage5 = RSU4F(64,16,64)
441
+ self.pool56 = nn.MaxPool2d(2,stride=2,ceil_mode=True)
442
+
443
+ self.stage6 = RSU4F(64,16,64)
444
+
445
+ # decoder
446
+ self.stage5d = RSU4F(128,16,64)
447
+ self.stage4d = RSU4(128,16,64)
448
+ self.stage3d = RSU5(128,16,64)
449
+ self.stage2d = RSU6(128,16,64)
450
+ self.stage1d = RSU7(128,16,64)
451
+
452
+ self.side1 = nn.Conv2d(64,out_ch,3,padding=1)
453
+ self.side2 = nn.Conv2d(64,out_ch,3,padding=1)
454
+ self.side3 = nn.Conv2d(64,out_ch,3,padding=1)
455
+ self.side4 = nn.Conv2d(64,out_ch,3,padding=1)
456
+ self.side5 = nn.Conv2d(64,out_ch,3,padding=1)
457
+ self.side6 = nn.Conv2d(64,out_ch,3,padding=1)
458
+
459
+ self.outconv = nn.Conv2d(6*out_ch,out_ch,1)
460
+
461
+ def forward(self,x):
462
+
463
+ hx = x
464
+
465
+ #stage 1
466
+ hx1 = self.stage1(hx)
467
+ hx = self.pool12(hx1)
468
+
469
+ #stage 2
470
+ hx2 = self.stage2(hx)
471
+ hx = self.pool23(hx2)
472
+
473
+ #stage 3
474
+ hx3 = self.stage3(hx)
475
+ hx = self.pool34(hx3)
476
+
477
+ #stage 4
478
+ hx4 = self.stage4(hx)
479
+ hx = self.pool45(hx4)
480
+
481
+ #stage 5
482
+ hx5 = self.stage5(hx)
483
+ hx = self.pool56(hx5)
484
+
485
+ #stage 6
486
+ hx6 = self.stage6(hx)
487
+ hx6up = _upsample_like(hx6,hx5)
488
+
489
+ #decoder
490
+ hx5d = self.stage5d(torch.cat((hx6up,hx5),1))
491
+ hx5dup = _upsample_like(hx5d,hx4)
492
+
493
+ hx4d = self.stage4d(torch.cat((hx5dup,hx4),1))
494
+ hx4dup = _upsample_like(hx4d,hx3)
495
+
496
+ hx3d = self.stage3d(torch.cat((hx4dup,hx3),1))
497
+ hx3dup = _upsample_like(hx3d,hx2)
498
+
499
+ hx2d = self.stage2d(torch.cat((hx3dup,hx2),1))
500
+ hx2dup = _upsample_like(hx2d,hx1)
501
+
502
+ hx1d = self.stage1d(torch.cat((hx2dup,hx1),1))
503
+
504
+
505
+ #side output
506
+ d1 = self.side1(hx1d)
507
+
508
+ d2 = self.side2(hx2d)
509
+ d2 = _upsample_like(d2,d1)
510
+
511
+ d3 = self.side3(hx3d)
512
+ d3 = _upsample_like(d3,d1)
513
+
514
+ d4 = self.side4(hx4d)
515
+ d4 = _upsample_like(d4,d1)
516
+
517
+ d5 = self.side5(hx5d)
518
+ d5 = _upsample_like(d5,d1)
519
+
520
+ d6 = self.side6(hx6)
521
+ d6 = _upsample_like(d6,d1)
522
+
523
+ d0 = self.outconv(torch.cat((d1,d2,d3,d4,d5,d6),1))
524
+
525
+ return F.sigmoid(d0), F.sigmoid(d1), F.sigmoid(d2), F.sigmoid(d3), F.sigmoid(d4), F.sigmoid(d5), F.sigmoid(d6)
u2netp.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e7567cde013fb64813973ce6e1ecc25a80c05c3ca7adbc5a54f3c3d90991b854
3
+ size 4683258
yolo11n.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0ebbc80d4a7680d14987a577cd21342b65ecfd94632bd9a8da63ae6417644ee1
3
+ size 5613764
yolov8x-worldv2.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:41e771bfbbb8894dd857f3fef7cac3b3578dffd49fd3547101efa6a606a02a0e
3
+ size 146355704