Ichiro1007 commited on
Commit
1c90073
·
verified ·
1 Parent(s): 49e996b

scorevision: push artifact

Browse files
Files changed (1) hide show
  1. miner.py +297 -0
miner.py ADDED
@@ -0,0 +1,297 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Production Miner for YOLOv9s 4-Class Beverage Detection
3
+ TurboVision Subnet 44 - Bittensor
4
+
5
+ This miner implements the required interface for TurboVision validators.
6
+ Model: YOLOv9s trained for 100 epochs on 4,840 images
7
+ Classes: bottle, wine_glass, cup, can
8
+ Performance: 89.59% mAP50, 100% can detection
9
+ """
10
+
11
+ from pathlib import Path
12
+ from typing import Optional
13
+ import cv2
14
+ import numpy as np
15
+ import onnxruntime as ort
16
+ from pydantic import BaseModel
17
+
18
+
19
+ class BoundingBox(BaseModel):
20
+ """Bounding box with class and confidence."""
21
+ x1: int
22
+ y1: int
23
+ x2: int
24
+ y2: int
25
+ cls_id: int
26
+ conf: float
27
+
28
+
29
+ class TVFrameResult(BaseModel):
30
+ """Result for a single frame."""
31
+ frame_id: int
32
+ boxes: list[BoundingBox]
33
+ keypoints: list[tuple[int, int]] # Empty for detection tasks
34
+
35
+
36
+ class Miner:
37
+ """
38
+ YOLOv9s 4-Class Beverage Detection Miner
39
+
40
+ Optimized for TurboVision beverage detection competition.
41
+ Achieves 89.59% mAP50 validation accuracy with 100% can detection.
42
+ """
43
+
44
+ def __init__(self, path_hf_repo: Path) -> None:
45
+ """
46
+ Initialize the miner with model from Hugging Face repo.
47
+
48
+ Args:
49
+ path_hf_repo: Path to the Hugging Face repository containing weights.onnx
50
+ """
51
+ self.path_hf_repo = path_hf_repo
52
+ self.class_names = ['bottle', 'wine_glass', 'cup', 'can']
53
+ self.num_classes = len(self.class_names)
54
+
55
+ # Model input size
56
+ self.input_size = 640
57
+
58
+ # Initialize ONNX session with optimizations
59
+ sess_options = ort.SessionOptions()
60
+ sess_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
61
+ sess_options.intra_op_num_threads = 4
62
+ sess_options.inter_op_num_threads = 4
63
+
64
+ # Load model
65
+ model_path = path_hf_repo / "weights.onnx"
66
+ self.session = ort.InferenceSession(
67
+ str(model_path),
68
+ sess_options=sess_options,
69
+ providers=["CUDAExecutionProvider", "CPUExecutionProvider"],
70
+ )
71
+
72
+ self.input_name = self.session.get_inputs()[0].name
73
+ self.output_names = [output.name for output in self.session.get_outputs()]
74
+
75
+ # Detection thresholds
76
+ self.conf_threshold = 0.25 # Confidence threshold
77
+ self.iou_threshold = 0.45 # NMS IoU threshold
78
+
79
+ print(f"✓ YOLOv9s model loaded from {model_path}")
80
+ print(f"✓ Input: {self.input_name}, Outputs: {self.output_names}")
81
+ print(f"✓ Classes: {self.class_names}")
82
+
83
+ def __repr__(self) -> str:
84
+ return (
85
+ f"YOLOv9s 4-Class Beverage Miner\n"
86
+ f"Model: {self.path_hf_repo / 'weights.onnx'}\n"
87
+ f"Classes: {self.class_names}\n"
88
+ f"Performance: 89.59% mAP50\n"
89
+ )
90
+
91
+ def preprocess(self, image: np.ndarray) -> np.ndarray:
92
+ """
93
+ Preprocess image for YOLO model.
94
+
95
+ Args:
96
+ image: BGR image (H, W, 3)
97
+
98
+ Returns:
99
+ Preprocessed tensor (1, 3, 640, 640)
100
+ """
101
+ # Resize to 640x640
102
+ img_resized = cv2.resize(image, (self.input_size, self.input_size))
103
+
104
+ # Convert BGR to RGB
105
+ img_rgb = cv2.cvtColor(img_resized, cv2.COLOR_BGR2RGB)
106
+
107
+ # Normalize to [0, 1]
108
+ img_normalized = img_rgb.astype(np.float32) / 255.0
109
+
110
+ # Transpose to CHW format
111
+ img_transposed = np.transpose(img_normalized, (2, 0, 1))
112
+
113
+ # Add batch dimension
114
+ img_batch = np.expand_dims(img_transposed, axis=0)
115
+
116
+ return img_batch
117
+
118
+ def postprocess(
119
+ self,
120
+ outputs: list[np.ndarray],
121
+ orig_shape: tuple[int, int]
122
+ ) -> list[BoundingBox]:
123
+ """
124
+ Post-process YOLO outputs to extract bounding boxes.
125
+
126
+ Args:
127
+ outputs: Raw YOLO outputs
128
+ orig_shape: Original image shape (height, width)
129
+
130
+ Returns:
131
+ List of detected bounding boxes
132
+ """
133
+ predictions = outputs[0] # Shape: (1, N, 4+num_classes)
134
+ predictions = predictions[0] # Remove batch dimension: (N, 4+num_classes)
135
+
136
+ # Extract boxes and scores
137
+ boxes = predictions[:, :4] # (N, 4) - x_center, y_center, width, height
138
+ scores = predictions[:, 4:] # (N, num_classes)
139
+
140
+ # Get max class score and index for each detection
141
+ class_ids = np.argmax(scores, axis=1) # (N,)
142
+ confidences = np.max(scores, axis=1) # (N,)
143
+
144
+ # Filter by confidence threshold
145
+ mask = confidences > self.conf_threshold
146
+ boxes = boxes[mask]
147
+ class_ids = class_ids[mask]
148
+ confidences = confidences[mask]
149
+
150
+ if len(boxes) == 0:
151
+ return []
152
+
153
+ # Convert from xywh to xyxy format
154
+ boxes_xyxy = np.zeros_like(boxes)
155
+ boxes_xyxy[:, 0] = boxes[:, 0] - boxes[:, 2] / 2 # x1
156
+ boxes_xyxy[:, 1] = boxes[:, 1] - boxes[:, 3] / 2 # y1
157
+ boxes_xyxy[:, 2] = boxes[:, 0] + boxes[:, 2] / 2 # x2
158
+ boxes_xyxy[:, 3] = boxes[:, 1] + boxes[:, 3] / 2 # y2
159
+
160
+ # Scale boxes to original image size
161
+ scale_x = orig_shape[1] / self.input_size
162
+ scale_y = orig_shape[0] / self.input_size
163
+ boxes_xyxy[:, [0, 2]] *= scale_x
164
+ boxes_xyxy[:, [1, 3]] *= scale_y
165
+
166
+ # Apply NMS
167
+ indices = self.nms(boxes_xyxy, confidences, self.iou_threshold)
168
+
169
+ # Create BoundingBox objects
170
+ detections = []
171
+ for idx in indices:
172
+ box = boxes_xyxy[idx]
173
+ detections.append(BoundingBox(
174
+ x1=int(box[0]),
175
+ y1=int(box[1]),
176
+ x2=int(box[2]),
177
+ y2=int(box[3]),
178
+ cls_id=int(class_ids[idx]),
179
+ conf=float(confidences[idx])
180
+ ))
181
+
182
+ return detections
183
+
184
+ def nms(
185
+ self,
186
+ boxes: np.ndarray,
187
+ scores: np.ndarray,
188
+ iou_threshold: float
189
+ ) -> list[int]:
190
+ """
191
+ Non-Maximum Suppression.
192
+
193
+ Args:
194
+ boxes: Bounding boxes in xyxy format (N, 4)
195
+ scores: Confidence scores (N,)
196
+ iou_threshold: IoU threshold for NMS
197
+
198
+ Returns:
199
+ Indices of boxes to keep
200
+ """
201
+ # Sort by confidence (descending)
202
+ indices = np.argsort(scores)[::-1]
203
+
204
+ keep = []
205
+ while len(indices) > 0:
206
+ # Pick the box with highest confidence
207
+ current = indices[0]
208
+ keep.append(current)
209
+
210
+ if len(indices) == 1:
211
+ break
212
+
213
+ # Compute IoU with remaining boxes
214
+ current_box = boxes[current]
215
+ other_boxes = boxes[indices[1:]]
216
+
217
+ ious = self.compute_iou(current_box, other_boxes)
218
+
219
+ # Keep boxes with IoU below threshold
220
+ mask = ious < iou_threshold
221
+ indices = indices[1:][mask]
222
+
223
+ return keep
224
+
225
+ def compute_iou(
226
+ self,
227
+ box: np.ndarray,
228
+ boxes: np.ndarray
229
+ ) -> np.ndarray:
230
+ """
231
+ Compute IoU between one box and multiple boxes.
232
+
233
+ Args:
234
+ box: Single box (4,)
235
+ boxes: Multiple boxes (N, 4)
236
+
237
+ Returns:
238
+ IoU values (N,)
239
+ """
240
+ # Compute intersection
241
+ x1 = np.maximum(box[0], boxes[:, 0])
242
+ y1 = np.maximum(box[1], boxes[:, 1])
243
+ x2 = np.minimum(box[2], boxes[:, 2])
244
+ y2 = np.minimum(box[3], boxes[:, 3])
245
+
246
+ intersection = np.maximum(0, x2 - x1) * np.maximum(0, y2 - y1)
247
+
248
+ # Compute union
249
+ box_area = (box[2] - box[0]) * (box[3] - box[1])
250
+ boxes_area = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
251
+ union = box_area + boxes_area - intersection
252
+
253
+ # Compute IoU
254
+ iou = intersection / (union + 1e-6)
255
+ return iou
256
+
257
+ def __call__(
258
+ self,
259
+ images: list[np.ndarray],
260
+ frame_ids: Optional[list[int]] = None,
261
+ ) -> list[TVFrameResult]:
262
+ """
263
+ Run detection on a batch of images.
264
+
265
+ Args:
266
+ images: List of BGR images
267
+ frame_ids: Optional frame IDs
268
+
269
+ Returns:
270
+ List of detection results
271
+ """
272
+ if frame_ids is None:
273
+ frame_ids = list(range(len(images)))
274
+
275
+ results = []
276
+ for image, frame_id in zip(images, frame_ids):
277
+ # Preprocess
278
+ input_tensor = self.preprocess(image)
279
+
280
+ # Run inference
281
+ outputs = self.session.run(
282
+ self.output_names,
283
+ {self.input_name: input_tensor}
284
+ )
285
+
286
+ # Post-process
287
+ boxes = self.postprocess(outputs, image.shape[:2])
288
+
289
+ # Create result
290
+ result = TVFrameResult(
291
+ frame_id=frame_id,
292
+ boxes=boxes,
293
+ keypoints=[] # Empty for detection tasks
294
+ )
295
+ results.append(result)
296
+
297
+ return results