scorevision: push artifact
Browse files- README.md +25 -20
- __pycache__/miner.cpython-312.pyc +0 -0
- chute_config.yml +1 -1
- class_names.txt +79 -0
- miner.py +82 -78
- model_type.json +1 -1
- weights.onnx +2 -2
README.md
CHANGED
|
@@ -1,14 +1,13 @@
|
|
| 1 |
---
|
| 2 |
tags:
|
| 3 |
- element_type:detect
|
| 4 |
-
- model:yolov11-
|
| 5 |
-
- object:
|
| 6 |
manako:
|
| 7 |
description: >
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
source: meaculpitt/Detect-Person
|
| 12 |
prompt_hints: null
|
| 13 |
input_payload:
|
| 14 |
- name: frame
|
|
@@ -17,26 +16,32 @@ manako:
|
|
| 17 |
output_payload:
|
| 18 |
- name: detections
|
| 19 |
type: detections
|
| 20 |
-
description: Bounding boxes for detected
|
| 21 |
-
evaluation_score: 0.
|
| 22 |
last_benchmark:
|
| 23 |
-
type:
|
| 24 |
-
ran_at:
|
| 25 |
result_path: null
|
| 26 |
---
|
| 27 |
|
| 28 |
-
# Detect-
|
| 29 |
|
| 30 |
-
|
| 31 |
|
| 32 |
| Metric | Value |
|
| 33 |
|--------|-------|
|
| 34 |
-
| mAP@50
|
| 35 |
-
|
|
| 36 |
-
|
|
| 37 |
-
|
|
| 38 |
-
|
|
| 39 |
-
|
|
| 40 |
|
| 41 |
-
|
| 42 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
---
|
| 2 |
tags:
|
| 3 |
- element_type:detect
|
| 4 |
+
- model:yolov11-small
|
| 5 |
+
- object:vehicle
|
| 6 |
manako:
|
| 7 |
description: >
|
| 8 |
+
YOLO11s vehicle detector fine-tuned on COCO vehicles + BDD100K + VisDrone.
|
| 9 |
+
FP16 ONNX, 1280x1280 input. Trained R6: 59,870 images, 50 epochs.
|
| 10 |
+
source: meaculpitt/Detect-Vehicle
|
|
|
|
| 11 |
prompt_hints: null
|
| 12 |
input_payload:
|
| 13 |
- name: frame
|
|
|
|
| 16 |
output_payload:
|
| 17 |
- name: detections
|
| 18 |
type: detections
|
| 19 |
+
description: Bounding boxes for detected vehicles
|
| 20 |
+
evaluation_score: 0.7701
|
| 21 |
last_benchmark:
|
| 22 |
+
type: visdrone_val
|
| 23 |
+
ran_at: 2026-03-25T17:34:00+00:00
|
| 24 |
result_path: null
|
| 25 |
---
|
| 26 |
|
| 27 |
+
# Detect-Vehicle β SN44
|
| 28 |
|
| 29 |
+
YOLO11s fine-tuned for vehicle detection (car, bus, truck, motorcycle).
|
| 30 |
|
| 31 |
| Metric | Value |
|
| 32 |
|--------|-------|
|
| 33 |
+
| mAP@50 | 77.01% |
|
| 34 |
+
| Model | YOLO11s (FP16 ONNX) |
|
| 35 |
+
| Input size | 1280x1280 |
|
| 36 |
+
| Model size | 19.2 MB |
|
| 37 |
+
| Training data | COCO vehicles + BDD100K + VisDrone (59,870 images) |
|
| 38 |
+
| Baseline to beat | 40.72% |
|
| 39 |
|
| 40 |
+
## Classes
|
| 41 |
+
|
| 42 |
+
| Output ID | Class |
|
| 43 |
+
|-----------|-------|
|
| 44 |
+
| 0 | car |
|
| 45 |
+
| 1 | bus |
|
| 46 |
+
| 2 | truck |
|
| 47 |
+
| 3 | motorcycle |
|
__pycache__/miner.cpython-312.pyc
CHANGED
|
Binary files a/__pycache__/miner.cpython-312.pyc and b/__pycache__/miner.cpython-312.pyc differ
|
|
|
chute_config.yml
CHANGED
|
@@ -2,7 +2,7 @@ Image:
|
|
| 2 |
from_base: parachutes/python:3.12
|
| 3 |
run_command:
|
| 4 |
- pip install --upgrade setuptools wheel
|
| 5 |
-
- pip install 'numpy>=1.23' 'onnxruntime-gpu>=1.16' 'nvidia-cudnn-cu12' 'nvidia-cublas-cu12' 'opencv-python-headless>=4.7' 'pillow>=9.5' 'huggingface_hub>=0.19.4' 'pydantic>=2.0' 'pyyaml>=6.0' 'aiohttp>=3.9'
|
| 6 |
|
| 7 |
NodeSelector:
|
| 8 |
gpu_count: 1
|
|
|
|
| 2 |
from_base: parachutes/python:3.12
|
| 3 |
run_command:
|
| 4 |
- pip install --upgrade setuptools wheel
|
| 5 |
+
- pip install 'numpy>=1.23' 'onnxruntime-gpu>=1.16' 'nvidia-cudnn-cu12' 'nvidia-cublas-cu12' 'opencv-python-headless>=4.7' 'pillow>=9.5' 'huggingface_hub>=0.19.4' 'pydantic>=2.0' 'pyyaml>=6.0' 'aiohttp>=3.9' 'ensemble-boxes>=1.0'
|
| 6 |
|
| 7 |
NodeSelector:
|
| 8 |
gpu_count: 1
|
class_names.txt
CHANGED
|
@@ -1 +1,80 @@
|
|
| 1 |
person
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
person
|
| 2 |
+
bicycle
|
| 3 |
+
car
|
| 4 |
+
motorcycle
|
| 5 |
+
airplane
|
| 6 |
+
bus
|
| 7 |
+
train
|
| 8 |
+
truck
|
| 9 |
+
boat
|
| 10 |
+
traffic light
|
| 11 |
+
fire hydrant
|
| 12 |
+
stop sign
|
| 13 |
+
parking meter
|
| 14 |
+
bench
|
| 15 |
+
bird
|
| 16 |
+
cat
|
| 17 |
+
dog
|
| 18 |
+
horse
|
| 19 |
+
sheep
|
| 20 |
+
cow
|
| 21 |
+
elephant
|
| 22 |
+
bear
|
| 23 |
+
zebra
|
| 24 |
+
giraffe
|
| 25 |
+
backpack
|
| 26 |
+
umbrella
|
| 27 |
+
handbag
|
| 28 |
+
tie
|
| 29 |
+
suitcase
|
| 30 |
+
frisbee
|
| 31 |
+
skis
|
| 32 |
+
snowboard
|
| 33 |
+
sports ball
|
| 34 |
+
kite
|
| 35 |
+
baseball bat
|
| 36 |
+
baseball glove
|
| 37 |
+
skateboard
|
| 38 |
+
surfboard
|
| 39 |
+
tennis racket
|
| 40 |
+
bottle
|
| 41 |
+
wine glass
|
| 42 |
+
cup
|
| 43 |
+
fork
|
| 44 |
+
knife
|
| 45 |
+
spoon
|
| 46 |
+
bowl
|
| 47 |
+
banana
|
| 48 |
+
apple
|
| 49 |
+
sandwich
|
| 50 |
+
orange
|
| 51 |
+
broccoli
|
| 52 |
+
carrot
|
| 53 |
+
hot dog
|
| 54 |
+
pizza
|
| 55 |
+
donut
|
| 56 |
+
cake
|
| 57 |
+
chair
|
| 58 |
+
couch
|
| 59 |
+
potted plant
|
| 60 |
+
bed
|
| 61 |
+
dining table
|
| 62 |
+
toilet
|
| 63 |
+
tv
|
| 64 |
+
laptop
|
| 65 |
+
mouse
|
| 66 |
+
remote
|
| 67 |
+
keyboard
|
| 68 |
+
cell phone
|
| 69 |
+
microwave
|
| 70 |
+
oven
|
| 71 |
+
toaster
|
| 72 |
+
sink
|
| 73 |
+
refrigerator
|
| 74 |
+
book
|
| 75 |
+
clock
|
| 76 |
+
vase
|
| 77 |
+
scissors
|
| 78 |
+
teddy bear
|
| 79 |
+
hair drier
|
| 80 |
+
toothbrush
|
miner.py
CHANGED
|
@@ -1,7 +1,12 @@
|
|
| 1 |
"""
|
| 2 |
-
Score Vision SN44 β
|
| 3 |
-
TTA (3 augmentations) + WBF fusion.
|
| 4 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
"""
|
| 6 |
|
| 7 |
from pathlib import Path
|
|
@@ -14,6 +19,15 @@ from numpy import ndarray
|
|
| 14 |
from pydantic import BaseModel
|
| 15 |
from ensemble_boxes import weighted_boxes_fusion
|
| 16 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
WBF_IOU_THR = 0.55
|
| 18 |
WBF_SKIP_THR = 0.0001
|
| 19 |
TTA_SCALE = 1.2
|
|
@@ -37,94 +51,78 @@ class TVFrameResult(BaseModel):
|
|
| 37 |
class Miner:
|
| 38 |
def __init__(self, path_hf_repo: Path) -> None:
|
| 39 |
self.path_hf_repo = path_hf_repo
|
| 40 |
-
self.class_names = ['person']
|
| 41 |
self.session = ort.InferenceSession(
|
| 42 |
str(path_hf_repo / "weights.onnx"),
|
| 43 |
providers=["CUDAExecutionProvider", "CPUExecutionProvider"],
|
| 44 |
)
|
| 45 |
self.input_name = self.session.get_inputs()[0].name
|
| 46 |
-
|
| 47 |
-
self.
|
| 48 |
-
self.input_w = int(input_shape[3])
|
| 49 |
-
self.conf_threshold = 0.50
|
| 50 |
-
self.iou_threshold = 0.45
|
| 51 |
|
| 52 |
def __repr__(self) -> str:
|
| 53 |
-
return f"
|
| 54 |
-
|
| 55 |
-
def
|
| 56 |
-
h, w =
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 65 |
"""Decode ONNX output to (boxes_xyxy, confs, cls_ids) in original image coords."""
|
| 66 |
pred = raw[0]
|
| 67 |
-
if pred.ndim != 2:
|
| 68 |
-
return np.empty((0, 4)), np.empty(0), np.empty(0, dtype=int)
|
| 69 |
if pred.shape[0] < pred.shape[1]:
|
| 70 |
-
pred = pred.
|
| 71 |
-
|
| 72 |
-
if pred.shape[1] < 5:
|
| 73 |
-
return np.empty((0, 4)), np.empty(0), np.empty(0, dtype=int)
|
| 74 |
|
| 75 |
-
|
| 76 |
cls_scores = pred[:, 4:]
|
| 77 |
-
if cls_scores.shape[1] == 0:
|
| 78 |
-
return np.empty((0, 4)), np.empty(0), np.empty(0, dtype=int)
|
| 79 |
|
| 80 |
cls_ids = np.argmax(cls_scores, axis=1)
|
| 81 |
confs = np.max(cls_scores, axis=1)
|
| 82 |
-
|
| 83 |
|
| 84 |
-
|
| 85 |
-
confs = confs[keep]
|
| 86 |
-
cls_ids = cls_ids[keep]
|
| 87 |
-
|
| 88 |
-
if boxes.shape[0] == 0:
|
| 89 |
return np.empty((0, 4)), np.empty(0), np.empty(0, dtype=int)
|
| 90 |
|
| 91 |
-
|
| 92 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 93 |
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
out_boxes = np.stack([x1, y1, x2, y2], axis=1)
|
| 100 |
|
| 101 |
-
return
|
| 102 |
|
| 103 |
def _run_single_pass(self, image_bgr: ndarray) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
|
|
|
|
| 104 |
orig_h, orig_w = image_bgr.shape[:2]
|
| 105 |
-
inp,
|
| 106 |
raw = self.session.run(None, {self.input_name: inp})[0]
|
| 107 |
-
return self._decode_raw(raw,
|
| 108 |
-
|
| 109 |
-
def _nms_dets(self, boxes: np.ndarray, confs: np.ndarray) -> tuple[np.ndarray, np.ndarray]:
|
| 110 |
-
"""Simple NMS on boxes+confs."""
|
| 111 |
-
if len(boxes) == 0:
|
| 112 |
-
return boxes, confs
|
| 113 |
-
x1, y1, x2, y2 = boxes[:, 0], boxes[:, 1], boxes[:, 2], boxes[:, 3]
|
| 114 |
-
areas = (x2 - x1) * (y2 - y1)
|
| 115 |
-
order = confs.argsort()[::-1]
|
| 116 |
-
keep = []
|
| 117 |
-
while len(order):
|
| 118 |
-
i = order[0]
|
| 119 |
-
keep.append(int(i))
|
| 120 |
-
xx1 = np.maximum(x1[i], x1[order[1:]])
|
| 121 |
-
yy1 = np.maximum(y1[i], y1[order[1:]])
|
| 122 |
-
xx2 = np.minimum(x2[i], x2[order[1:]])
|
| 123 |
-
yy2 = np.minimum(y2[i], y2[order[1:]])
|
| 124 |
-
inter = np.maximum(0, xx2 - xx1) * np.maximum(0, yy2 - yy1)
|
| 125 |
-
iou = inter / (areas[i] + areas[order[1:]] - inter + 1e-7)
|
| 126 |
-
order = order[1:][iou <= self.iou_threshold]
|
| 127 |
-
return boxes[keep], confs[keep]
|
| 128 |
|
| 129 |
def _infer_single(self, image_bgr: ndarray) -> list[BoundingBox]:
|
| 130 |
orig_h, orig_w = image_bgr.shape[:2]
|
|
@@ -136,47 +134,55 @@ class Miner:
|
|
| 136 |
# ββ TTA pass 1: original ββ
|
| 137 |
boxes, confs, cls_ids = self._run_single_pass(image_bgr)
|
| 138 |
if len(boxes):
|
|
|
|
|
|
|
|
|
|
| 139 |
norm_boxes = boxes.copy()
|
| 140 |
norm_boxes[:, [0, 2]] /= orig_w
|
| 141 |
norm_boxes[:, [1, 3]] /= orig_h
|
| 142 |
norm_boxes = np.clip(norm_boxes, 0, 1)
|
| 143 |
all_boxes_list.append(norm_boxes)
|
| 144 |
all_scores_list.append(confs)
|
| 145 |
-
all_labels_list.append(
|
| 146 |
|
| 147 |
# ββ TTA pass 2: horizontal flip ββ
|
| 148 |
flipped = cv2.flip(image_bgr, 1)
|
| 149 |
boxes_f, confs_f, cls_ids_f = self._run_single_pass(flipped)
|
| 150 |
if len(boxes_f):
|
|
|
|
| 151 |
boxes_f[:, 0], boxes_f[:, 2] = orig_w - boxes_f[:, 2], orig_w - boxes_f[:, 0]
|
|
|
|
| 152 |
norm_boxes_f = boxes_f.copy()
|
| 153 |
norm_boxes_f[:, [0, 2]] /= orig_w
|
| 154 |
norm_boxes_f[:, [1, 3]] /= orig_h
|
| 155 |
norm_boxes_f = np.clip(norm_boxes_f, 0, 1)
|
| 156 |
all_boxes_list.append(norm_boxes_f)
|
| 157 |
all_scores_list.append(confs_f)
|
| 158 |
-
all_labels_list.append(
|
| 159 |
|
| 160 |
-
# ββ TTA pass 3: scale 1.2x center crop ββ
|
| 161 |
scaled_h, scaled_w = int(orig_h * TTA_SCALE), int(orig_w * TTA_SCALE)
|
| 162 |
scaled = cv2.resize(image_bgr, (scaled_w, scaled_h), interpolation=cv2.INTER_LINEAR)
|
|
|
|
| 163 |
y_off = (scaled_h - orig_h) // 2
|
| 164 |
x_off = (scaled_w - orig_w) // 2
|
| 165 |
cropped = scaled[y_off:y_off + orig_h, x_off:x_off + orig_w]
|
| 166 |
boxes_s, confs_s, cls_ids_s = self._run_single_pass(cropped)
|
| 167 |
if len(boxes_s):
|
|
|
|
| 168 |
boxes_s[:, 0] = (boxes_s[:, 0] + x_off) / TTA_SCALE
|
| 169 |
boxes_s[:, 1] = (boxes_s[:, 1] + y_off) / TTA_SCALE
|
| 170 |
boxes_s[:, 2] = (boxes_s[:, 2] + x_off) / TTA_SCALE
|
| 171 |
boxes_s[:, 3] = (boxes_s[:, 3] + y_off) / TTA_SCALE
|
| 172 |
boxes_s = np.clip(boxes_s, 0, [[orig_w, orig_h, orig_w, orig_h]])
|
|
|
|
| 173 |
norm_boxes_s = boxes_s.copy()
|
| 174 |
norm_boxes_s[:, [0, 2]] /= orig_w
|
| 175 |
norm_boxes_s[:, [1, 3]] /= orig_h
|
| 176 |
norm_boxes_s = np.clip(norm_boxes_s, 0, 1)
|
| 177 |
all_boxes_list.append(norm_boxes_s)
|
| 178 |
all_scores_list.append(confs_s)
|
| 179 |
-
all_labels_list.append(
|
| 180 |
|
| 181 |
if not all_boxes_list:
|
| 182 |
return []
|
|
@@ -217,11 +223,9 @@ class Miner:
|
|
| 217 |
for idx, image in enumerate(batch_images):
|
| 218 |
boxes = self._infer_single(image)
|
| 219 |
keypoints = [(0, 0) for _ in range(max(0, int(n_keypoints)))]
|
| 220 |
-
results.append(
|
| 221 |
-
|
| 222 |
-
|
| 223 |
-
|
| 224 |
-
|
| 225 |
-
)
|
| 226 |
-
)
|
| 227 |
return results
|
|
|
|
| 1 |
"""
|
| 2 |
+
Score Vision SN44 β VehicleDetect miner. v3 (2026-03-26).
|
| 3 |
+
TTA (3 augmentations) + WBF fusion. Letterbox preprocessing.
|
| 4 |
+
|
| 5 |
+
Model: YOLO11s ONNX, 4 classes trained as:
|
| 6 |
+
0 = car, 1 = bus, 2 = truck, 3 = motorcycle
|
| 7 |
+
|
| 8 |
+
Official submission order (remapped in MODEL_TO_OUT):
|
| 9 |
+
0 = bus, 1 = car, 2 = truck, 3 = motorcycle
|
| 10 |
"""
|
| 11 |
|
| 12 |
from pathlib import Path
|
|
|
|
| 19 |
from pydantic import BaseModel
|
| 20 |
from ensemble_boxes import weighted_boxes_fusion
|
| 21 |
|
| 22 |
+
|
| 23 |
+
# ββ Model class index β submission class index βββββββββββββββββββββββββββββββ
|
| 24 |
+
MODEL_TO_OUT: dict[int, int] = {0: 1, 1: 0, 2: 2, 3: 3}
|
| 25 |
+
OUT_TO_MODEL: dict[int, int] = {v: k for k, v in MODEL_TO_OUT.items()}
|
| 26 |
+
OUT_NAMES = ["bus", "car", "truck", "motorcycle"]
|
| 27 |
+
|
| 28 |
+
IMG_SIZE = 1280
|
| 29 |
+
CONF_THRESH = 0.55
|
| 30 |
+
IOU_THRESH = 0.45
|
| 31 |
WBF_IOU_THR = 0.55
|
| 32 |
WBF_SKIP_THR = 0.0001
|
| 33 |
TTA_SCALE = 1.2
|
|
|
|
| 51 |
class Miner:
|
| 52 |
def __init__(self, path_hf_repo: Path) -> None:
|
| 53 |
self.path_hf_repo = path_hf_repo
|
|
|
|
| 54 |
self.session = ort.InferenceSession(
|
| 55 |
str(path_hf_repo / "weights.onnx"),
|
| 56 |
providers=["CUDAExecutionProvider", "CPUExecutionProvider"],
|
| 57 |
)
|
| 58 |
self.input_name = self.session.get_inputs()[0].name
|
| 59 |
+
self.conf_threshold = CONF_THRESH
|
| 60 |
+
self.iou_threshold = IOU_THRESH
|
|
|
|
|
|
|
|
|
|
| 61 |
|
| 62 |
def __repr__(self) -> str:
|
| 63 |
+
return f"VehicleDetect Miner TTA+WBF session={type(self.session).__name__}"
|
| 64 |
+
|
| 65 |
+
def _letterbox(self, img: ndarray) -> tuple[np.ndarray, float, int, int]:
|
| 66 |
+
h, w = img.shape[:2]
|
| 67 |
+
r = min(IMG_SIZE / h, IMG_SIZE / w)
|
| 68 |
+
new_w, new_h = int(round(w * r)), int(round(h * r))
|
| 69 |
+
img_r = cv2.resize(img, (new_w, new_h), interpolation=cv2.INTER_LINEAR)
|
| 70 |
+
dw, dh = IMG_SIZE - new_w, IMG_SIZE - new_h
|
| 71 |
+
pad_l, pad_t = dw // 2, dh // 2
|
| 72 |
+
img_p = cv2.copyMakeBorder(
|
| 73 |
+
img_r, pad_t, dh - pad_t, pad_l, dw - pad_l,
|
| 74 |
+
cv2.BORDER_CONSTANT, value=(114, 114, 114),
|
| 75 |
+
)
|
| 76 |
+
return img_p, r, pad_l, pad_t
|
| 77 |
+
|
| 78 |
+
def _preprocess(self, image_bgr: ndarray) -> tuple[np.ndarray, float, int, int]:
|
| 79 |
+
img_p, ratio, pad_l, pad_t = self._letterbox(image_bgr)
|
| 80 |
+
img_rgb = cv2.cvtColor(img_p, cv2.COLOR_BGR2RGB)
|
| 81 |
+
inp = img_rgb.astype(np.float32) / 255.0
|
| 82 |
+
inp = np.ascontiguousarray(inp.transpose(2, 0, 1)[np.newaxis])
|
| 83 |
+
return inp, ratio, pad_l, pad_t
|
| 84 |
+
|
| 85 |
+
def _decode_raw(self, raw: np.ndarray, ratio: float, pad_l: int, pad_t: int,
|
| 86 |
+
orig_w: int, orig_h: int) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
|
| 87 |
"""Decode ONNX output to (boxes_xyxy, confs, cls_ids) in original image coords."""
|
| 88 |
pred = raw[0]
|
|
|
|
|
|
|
| 89 |
if pred.shape[0] < pred.shape[1]:
|
| 90 |
+
pred = pred.T
|
|
|
|
|
|
|
|
|
|
| 91 |
|
| 92 |
+
bboxes_cx = pred[:, :4]
|
| 93 |
cls_scores = pred[:, 4:]
|
|
|
|
|
|
|
| 94 |
|
| 95 |
cls_ids = np.argmax(cls_scores, axis=1)
|
| 96 |
confs = np.max(cls_scores, axis=1)
|
| 97 |
+
mask = confs >= self.conf_threshold
|
| 98 |
|
| 99 |
+
if not mask.any():
|
|
|
|
|
|
|
|
|
|
|
|
|
| 100 |
return np.empty((0, 4)), np.empty(0), np.empty(0, dtype=int)
|
| 101 |
|
| 102 |
+
bboxes_cx = bboxes_cx[mask]
|
| 103 |
+
confs = confs[mask]
|
| 104 |
+
cls_ids = cls_ids[mask]
|
| 105 |
+
|
| 106 |
+
cx, cy, bw, bh = bboxes_cx[:, 0], bboxes_cx[:, 1], bboxes_cx[:, 2], bboxes_cx[:, 3]
|
| 107 |
+
lx1 = cx - bw / 2
|
| 108 |
+
ly1 = cy - bh / 2
|
| 109 |
+
lx2 = cx + bw / 2
|
| 110 |
+
ly2 = cy + bh / 2
|
| 111 |
|
| 112 |
+
x1 = np.clip((lx1 - pad_l) / ratio, 0, orig_w)
|
| 113 |
+
y1 = np.clip((ly1 - pad_t) / ratio, 0, orig_h)
|
| 114 |
+
x2 = np.clip((lx2 - pad_l) / ratio, 0, orig_w)
|
| 115 |
+
y2 = np.clip((ly2 - pad_t) / ratio, 0, orig_h)
|
| 116 |
+
boxes = np.stack([x1, y1, x2, y2], axis=1)
|
|
|
|
| 117 |
|
| 118 |
+
return boxes, confs, cls_ids
|
| 119 |
|
| 120 |
def _run_single_pass(self, image_bgr: ndarray) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
|
| 121 |
+
"""Run one inference pass, return (boxes_xyxy, confs, cls_ids) in original coords."""
|
| 122 |
orig_h, orig_w = image_bgr.shape[:2]
|
| 123 |
+
inp, ratio, pad_l, pad_t = self._preprocess(image_bgr)
|
| 124 |
raw = self.session.run(None, {self.input_name: inp})[0]
|
| 125 |
+
return self._decode_raw(raw, ratio, pad_l, pad_t, orig_w, orig_h)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 126 |
|
| 127 |
def _infer_single(self, image_bgr: ndarray) -> list[BoundingBox]:
|
| 128 |
orig_h, orig_w = image_bgr.shape[:2]
|
|
|
|
| 134 |
# ββ TTA pass 1: original ββ
|
| 135 |
boxes, confs, cls_ids = self._run_single_pass(image_bgr)
|
| 136 |
if len(boxes):
|
| 137 |
+
# Remap to output class IDs for WBF
|
| 138 |
+
out_cls = np.array([MODEL_TO_OUT[int(c)] for c in cls_ids])
|
| 139 |
+
# Normalize to [0,1]
|
| 140 |
norm_boxes = boxes.copy()
|
| 141 |
norm_boxes[:, [0, 2]] /= orig_w
|
| 142 |
norm_boxes[:, [1, 3]] /= orig_h
|
| 143 |
norm_boxes = np.clip(norm_boxes, 0, 1)
|
| 144 |
all_boxes_list.append(norm_boxes)
|
| 145 |
all_scores_list.append(confs)
|
| 146 |
+
all_labels_list.append(out_cls)
|
| 147 |
|
| 148 |
# ββ TTA pass 2: horizontal flip ββ
|
| 149 |
flipped = cv2.flip(image_bgr, 1)
|
| 150 |
boxes_f, confs_f, cls_ids_f = self._run_single_pass(flipped)
|
| 151 |
if len(boxes_f):
|
| 152 |
+
# Flip x coords back
|
| 153 |
boxes_f[:, 0], boxes_f[:, 2] = orig_w - boxes_f[:, 2], orig_w - boxes_f[:, 0]
|
| 154 |
+
out_cls_f = np.array([MODEL_TO_OUT[int(c)] for c in cls_ids_f])
|
| 155 |
norm_boxes_f = boxes_f.copy()
|
| 156 |
norm_boxes_f[:, [0, 2]] /= orig_w
|
| 157 |
norm_boxes_f[:, [1, 3]] /= orig_h
|
| 158 |
norm_boxes_f = np.clip(norm_boxes_f, 0, 1)
|
| 159 |
all_boxes_list.append(norm_boxes_f)
|
| 160 |
all_scores_list.append(confs_f)
|
| 161 |
+
all_labels_list.append(out_cls_f)
|
| 162 |
|
| 163 |
+
# ββ TTA pass 3: scale 1.2x (center crop to original size) ββ
|
| 164 |
scaled_h, scaled_w = int(orig_h * TTA_SCALE), int(orig_w * TTA_SCALE)
|
| 165 |
scaled = cv2.resize(image_bgr, (scaled_w, scaled_h), interpolation=cv2.INTER_LINEAR)
|
| 166 |
+
# Center crop back to original size
|
| 167 |
y_off = (scaled_h - orig_h) // 2
|
| 168 |
x_off = (scaled_w - orig_w) // 2
|
| 169 |
cropped = scaled[y_off:y_off + orig_h, x_off:x_off + orig_w]
|
| 170 |
boxes_s, confs_s, cls_ids_s = self._run_single_pass(cropped)
|
| 171 |
if len(boxes_s):
|
| 172 |
+
# Map cropped coords back to original: offset + scale
|
| 173 |
boxes_s[:, 0] = (boxes_s[:, 0] + x_off) / TTA_SCALE
|
| 174 |
boxes_s[:, 1] = (boxes_s[:, 1] + y_off) / TTA_SCALE
|
| 175 |
boxes_s[:, 2] = (boxes_s[:, 2] + x_off) / TTA_SCALE
|
| 176 |
boxes_s[:, 3] = (boxes_s[:, 3] + y_off) / TTA_SCALE
|
| 177 |
boxes_s = np.clip(boxes_s, 0, [[orig_w, orig_h, orig_w, orig_h]])
|
| 178 |
+
out_cls_s = np.array([MODEL_TO_OUT[int(c)] for c in cls_ids_s])
|
| 179 |
norm_boxes_s = boxes_s.copy()
|
| 180 |
norm_boxes_s[:, [0, 2]] /= orig_w
|
| 181 |
norm_boxes_s[:, [1, 3]] /= orig_h
|
| 182 |
norm_boxes_s = np.clip(norm_boxes_s, 0, 1)
|
| 183 |
all_boxes_list.append(norm_boxes_s)
|
| 184 |
all_scores_list.append(confs_s)
|
| 185 |
+
all_labels_list.append(out_cls_s)
|
| 186 |
|
| 187 |
if not all_boxes_list:
|
| 188 |
return []
|
|
|
|
| 223 |
for idx, image in enumerate(batch_images):
|
| 224 |
boxes = self._infer_single(image)
|
| 225 |
keypoints = [(0, 0) for _ in range(max(0, int(n_keypoints)))]
|
| 226 |
+
results.append(TVFrameResult(
|
| 227 |
+
frame_id=offset + idx,
|
| 228 |
+
boxes=boxes,
|
| 229 |
+
keypoints=keypoints,
|
| 230 |
+
))
|
|
|
|
|
|
|
| 231 |
return results
|
model_type.json
CHANGED
|
@@ -1 +1 @@
|
|
| 1 |
-
{"task_type": "object-detection", "model_type": "yolov11-
|
|
|
|
| 1 |
+
{"task_type": "object-detection", "model_type": "yolov11-small", "deploy": "2026-03-26T07:43Z"}
|
weights.onnx
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e3916408ec21f8c94358c18914f922814770b78557e52fe17ff7a9ee74339a5a
|
| 3 |
+
size 19272252
|