VietCat commited on
Commit
9c6bc51
·
1 Parent(s): 7963d0f

add more logic to process image

Browse files
Files changed (2) hide show
  1. config.yaml +1 -1
  2. model.py +80 -6
config.yaml CHANGED
@@ -1,6 +1,6 @@
1
  model:
2
  path: 'VietCat/GTSRB-Model/models/GTSRB.pt' # Path to the YOLO model on Hugging Face Hub (will be downloaded automatically)
3
- confidence_threshold: 0.25 # Minimum confidence for detections
4
 
5
  inference:
6
  box_color: (128, 0, 128) # Purple color for bounding boxes (BGR format)
 
1
  model:
2
  path: 'VietCat/GTSRB-Model/models/GTSRB.pt' # Path to the YOLO model on Hugging Face Hub (will be downloaded automatically)
3
+ confidence_threshold: 0.15 # Minimum confidence for detections
4
 
5
  inference:
6
  box_color: (128, 0, 128) # Purple color for bounding boxes (BGR format)
model.py CHANGED
@@ -39,6 +39,50 @@ class TrafficSignDetector:
39
  self.thickness = config['inference']['thickness']
40
  self.classes = config['classes']
41
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
  def detect(self, image):
43
  """
44
  Perform inference on the image and draw bounding boxes.
@@ -46,24 +90,54 @@ class TrafficSignDetector:
46
  :return: image with drawn bounding boxes
47
  """
48
  print(f"Input image shape: {image.shape}")
49
- results = self.model(image, conf=self.conf_threshold)
 
 
 
 
 
 
 
 
 
 
 
 
50
  print(f"Number of results: {len(results)}")
51
 
 
 
 
52
  for result in results:
53
  boxes = result.boxes
54
  print(f"Number of boxes in this result: {len(boxes)}")
 
 
 
 
 
 
 
 
55
  for box in boxes:
56
- # Get bounding box coordinates
57
  x1, y1, x2, y2 = box.xyxy[0].cpu().numpy().astype(int)
 
 
 
 
 
 
 
58
  conf = box.conf[0].cpu().numpy()
59
  cls = int(box.cls[0].cpu().numpy())
60
  print(f"Detected: {self.classes[cls]} with conf {conf:.2f} at ({x1},{y1})-({x2},{y2})")
61
 
62
- # Draw bounding box
63
- cv2.rectangle(image, (x1, y1), (x2, y2), self.box_color, self.thickness)
64
 
65
  # Draw label
66
  label = f"{self.classes[cls]}: {conf:.2f}"
67
- cv2.putText(image, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, self.text_color, 2)
68
 
69
- return image
 
39
  self.thickness = config['inference']['thickness']
40
  self.classes = config['classes']
41
 
42
+ def _ensure_square(self, image, target_size=640):
43
+ """
44
+ Adjust image to square while maintaining aspect ratio.
45
+ - If image is smaller: pad to target_size x target_size
46
+ - If image is larger: resize down to target_size x target_size
47
+ Letterbox padding is added to preserve aspect ratio.
48
+ :param image: input image (numpy array)
49
+ :param target_size: target size (default 640x640)
50
+ :return: square image (target_size x target_size)
51
+ """
52
+ height, width = image.shape[:2]
53
+ max_dim = max(width, height)
54
+
55
+ # Scale to fit target while maintaining aspect ratio
56
+ scale = target_size / max_dim
57
+
58
+ # Calculate new dimensions
59
+ new_width = int(width * scale)
60
+ new_height = int(height * scale)
61
+
62
+ # Resize image
63
+ resized = cv2.resize(image, (new_width, new_height), interpolation=cv2.INTER_LINEAR)
64
+
65
+ # Create canvas and place resized image
66
+ canvas = np.full((target_size, target_size, 3), (114, 114, 114), dtype=np.uint8)
67
+ pad_x = (target_size - new_width) // 2
68
+ pad_y = (target_size - new_height) // 2
69
+ canvas[pad_y:pad_y + new_height, pad_x:pad_x + new_width] = resized
70
+
71
+ print(f"Original: {image.shape} → Scale: {scale:.3f} → Resized: {resized.shape} → Final: {canvas.shape}")
72
+
73
+ return canvas, scale, pad_x, pad_y
74
+
75
+ def _preprocess(self, image):
76
+ """
77
+ Preprocess image: normalize pixel values to [0, 1] range.
78
+ :param image: input image (numpy array, uint8)
79
+ :return: normalized image (float32)
80
+ """
81
+ # Normalize pixel values from [0, 255] to [0, 1]
82
+ image = image.astype(np.float32) / 255.0
83
+ print(f"Image normalized - Min: {image.min():.3f}, Max: {image.max():.3f}, Mean: {image.mean():.3f}")
84
+ return image
85
+
86
  def detect(self, image):
87
  """
88
  Perform inference on the image and draw bounding boxes.
 
90
  :return: image with drawn bounding boxes
91
  """
92
  print(f"Input image shape: {image.shape}")
93
+
94
+ # Store original image for drawing (uint8)
95
+ original_image = image.copy()
96
+
97
+ # Apply letterbox preprocessing to ensure 640x640 matching training size
98
+ # Returns both processed image and transformation info
99
+ image, scale, pad_x, pad_y = self._ensure_square(image, target_size=640)
100
+
101
+ # Normalize pixel values for inference
102
+ image = self._preprocess(image)
103
+
104
+ # Use imgsz=640 to match training size
105
+ results = self.model(image, conf=self.conf_threshold, imgsz=640)
106
  print(f"Number of results: {len(results)}")
107
 
108
+ # Get original dimensions for coordinate transformation
109
+ orig_h, orig_w = original_image.shape[:2]
110
+
111
  for result in results:
112
  boxes = result.boxes
113
  print(f"Number of boxes in this result: {len(boxes)}")
114
+
115
+ # Debug: print all detection confidences
116
+ if len(boxes) > 0:
117
+ confidences = [float(box.conf[0]) for box in boxes]
118
+ print(f"Detected confidences: {confidences}")
119
+ else:
120
+ print(f"No detections above threshold {self.conf_threshold}")
121
+
122
  for box in boxes:
123
+ # Get bounding box coordinates from letterboxed image
124
  x1, y1, x2, y2 = box.xyxy[0].cpu().numpy().astype(int)
125
+
126
+ # Convert coordinates back to original image space
127
+ x1 = max(0, int((x1 - pad_x) / scale))
128
+ y1 = max(0, int((y1 - pad_y) / scale))
129
+ x2 = min(orig_w, int((x2 - pad_x) / scale))
130
+ y2 = min(orig_h, int((y2 - pad_y) / scale))
131
+
132
  conf = box.conf[0].cpu().numpy()
133
  cls = int(box.cls[0].cpu().numpy())
134
  print(f"Detected: {self.classes[cls]} with conf {conf:.2f} at ({x1},{y1})-({x2},{y2})")
135
 
136
+ # Draw bounding box on original image
137
+ cv2.rectangle(original_image, (x1, y1), (x2, y2), self.box_color, self.thickness)
138
 
139
  # Draw label
140
  label = f"{self.classes[cls]}: {conf:.2f}"
141
+ cv2.putText(original_image, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, self.text_color, 2)
142
 
143
+ return original_image