scorevision: push artifact
Browse files- README.md +25 -20
- __pycache__/miner.cpython-312.pyc +0 -0
- class_names.txt +79 -0
- miner.py +164 -108
- model_type.json +1 -1
- weights.onnx +2 -2
README.md
CHANGED
|
@@ -1,14 +1,13 @@
|
|
| 1 |
---
|
| 2 |
tags:
|
| 3 |
- element_type:detect
|
| 4 |
-
- model:yolov11-
|
| 5 |
-
- object:
|
| 6 |
manako:
|
| 7 |
description: >
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
source: meaculpitt/Detect-Person
|
| 12 |
prompt_hints: null
|
| 13 |
input_payload:
|
| 14 |
- name: frame
|
|
@@ -17,26 +16,32 @@ manako:
|
|
| 17 |
output_payload:
|
| 18 |
- name: detections
|
| 19 |
type: detections
|
| 20 |
-
description: Bounding boxes for detected
|
| 21 |
-
evaluation_score: 0.
|
| 22 |
last_benchmark:
|
| 23 |
-
type:
|
| 24 |
-
ran_at:
|
| 25 |
result_path: null
|
| 26 |
---
|
| 27 |
|
| 28 |
-
# Detect-
|
| 29 |
|
| 30 |
-
|
| 31 |
|
| 32 |
| Metric | Value |
|
| 33 |
|--------|-------|
|
| 34 |
-
| mAP@50
|
| 35 |
-
|
|
| 36 |
-
|
|
| 37 |
-
|
|
| 38 |
-
|
|
| 39 |
-
|
|
| 40 |
|
| 41 |
-
|
| 42 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
---
|
| 2 |
tags:
|
| 3 |
- element_type:detect
|
| 4 |
+
- model:yolov11-small
|
| 5 |
+
- object:vehicle
|
| 6 |
manako:
|
| 7 |
description: >
|
| 8 |
+
YOLO11s vehicle detector fine-tuned on COCO vehicles + BDD100K + VisDrone.
|
| 9 |
+
FP16 ONNX, 1280x1280 input. Trained R6: 59,870 images, 50 epochs.
|
| 10 |
+
source: meaculpitt/Detect-Vehicle
|
|
|
|
| 11 |
prompt_hints: null
|
| 12 |
input_payload:
|
| 13 |
- name: frame
|
|
|
|
| 16 |
output_payload:
|
| 17 |
- name: detections
|
| 18 |
type: detections
|
| 19 |
+
description: Bounding boxes for detected vehicles
|
| 20 |
+
evaluation_score: 0.7701
|
| 21 |
last_benchmark:
|
| 22 |
+
type: visdrone_val
|
| 23 |
+
ran_at: 2026-03-25T17:34:00+00:00
|
| 24 |
result_path: null
|
| 25 |
---
|
| 26 |
|
| 27 |
+
# Detect-Vehicle β SN44
|
| 28 |
|
| 29 |
+
YOLO11s fine-tuned for vehicle detection (car, bus, truck, motorcycle).
|
| 30 |
|
| 31 |
| Metric | Value |
|
| 32 |
|--------|-------|
|
| 33 |
+
| mAP@50 | 77.01% |
|
| 34 |
+
| Model | YOLO11s (FP16 ONNX) |
|
| 35 |
+
| Input size | 1280x1280 |
|
| 36 |
+
| Model size | 19.2 MB |
|
| 37 |
+
| Training data | COCO vehicles + BDD100K + VisDrone (59,870 images) |
|
| 38 |
+
| Baseline to beat | 40.72% |
|
| 39 |
|
| 40 |
+
## Classes
|
| 41 |
+
|
| 42 |
+
| Output ID | Class |
|
| 43 |
+
|-----------|-------|
|
| 44 |
+
| 0 | car |
|
| 45 |
+
| 1 | bus |
|
| 46 |
+
| 2 | truck |
|
| 47 |
+
| 3 | motorcycle |
|
__pycache__/miner.cpython-312.pyc
CHANGED
|
Binary files a/__pycache__/miner.cpython-312.pyc and b/__pycache__/miner.cpython-312.pyc differ
|
|
|
class_names.txt
CHANGED
|
@@ -1 +1,80 @@
|
|
| 1 |
person
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
person
|
| 2 |
+
bicycle
|
| 3 |
+
car
|
| 4 |
+
motorcycle
|
| 5 |
+
airplane
|
| 6 |
+
bus
|
| 7 |
+
train
|
| 8 |
+
truck
|
| 9 |
+
boat
|
| 10 |
+
traffic light
|
| 11 |
+
fire hydrant
|
| 12 |
+
stop sign
|
| 13 |
+
parking meter
|
| 14 |
+
bench
|
| 15 |
+
bird
|
| 16 |
+
cat
|
| 17 |
+
dog
|
| 18 |
+
horse
|
| 19 |
+
sheep
|
| 20 |
+
cow
|
| 21 |
+
elephant
|
| 22 |
+
bear
|
| 23 |
+
zebra
|
| 24 |
+
giraffe
|
| 25 |
+
backpack
|
| 26 |
+
umbrella
|
| 27 |
+
handbag
|
| 28 |
+
tie
|
| 29 |
+
suitcase
|
| 30 |
+
frisbee
|
| 31 |
+
skis
|
| 32 |
+
snowboard
|
| 33 |
+
sports ball
|
| 34 |
+
kite
|
| 35 |
+
baseball bat
|
| 36 |
+
baseball glove
|
| 37 |
+
skateboard
|
| 38 |
+
surfboard
|
| 39 |
+
tennis racket
|
| 40 |
+
bottle
|
| 41 |
+
wine glass
|
| 42 |
+
cup
|
| 43 |
+
fork
|
| 44 |
+
knife
|
| 45 |
+
spoon
|
| 46 |
+
bowl
|
| 47 |
+
banana
|
| 48 |
+
apple
|
| 49 |
+
sandwich
|
| 50 |
+
orange
|
| 51 |
+
broccoli
|
| 52 |
+
carrot
|
| 53 |
+
hot dog
|
| 54 |
+
pizza
|
| 55 |
+
donut
|
| 56 |
+
cake
|
| 57 |
+
chair
|
| 58 |
+
couch
|
| 59 |
+
potted plant
|
| 60 |
+
bed
|
| 61 |
+
dining table
|
| 62 |
+
toilet
|
| 63 |
+
tv
|
| 64 |
+
laptop
|
| 65 |
+
mouse
|
| 66 |
+
remote
|
| 67 |
+
keyboard
|
| 68 |
+
cell phone
|
| 69 |
+
microwave
|
| 70 |
+
oven
|
| 71 |
+
toaster
|
| 72 |
+
sink
|
| 73 |
+
refrigerator
|
| 74 |
+
book
|
| 75 |
+
clock
|
| 76 |
+
vase
|
| 77 |
+
scissors
|
| 78 |
+
teddy bear
|
| 79 |
+
hair drier
|
| 80 |
+
toothbrush
|
miner.py
CHANGED
|
@@ -1,3 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
from pathlib import Path
|
| 2 |
import math
|
| 3 |
|
|
@@ -6,6 +17,20 @@ import numpy as np
|
|
| 6 |
import onnxruntime as ort
|
| 7 |
from numpy import ndarray
|
| 8 |
from pydantic import BaseModel
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
|
| 10 |
|
| 11 |
class BoundingBox(BaseModel):
|
|
@@ -24,135 +49,168 @@ class TVFrameResult(BaseModel):
|
|
| 24 |
|
| 25 |
|
| 26 |
class Miner:
|
| 27 |
-
"""
|
| 28 |
-
Auto-generated by subnet_bridge from a Manako element repo.
|
| 29 |
-
This miner is intentionally self-contained for chute import restrictions.
|
| 30 |
-
"""
|
| 31 |
-
|
| 32 |
def __init__(self, path_hf_repo: Path) -> None:
|
| 33 |
self.path_hf_repo = path_hf_repo
|
| 34 |
-
self.class_names = ['person']
|
| 35 |
self.session = ort.InferenceSession(
|
| 36 |
str(path_hf_repo / "weights.onnx"),
|
| 37 |
providers=["CUDAExecutionProvider", "CPUExecutionProvider"],
|
| 38 |
)
|
| 39 |
self.input_name = self.session.get_inputs()[0].name
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
self.input_h = int(input_shape[2])
|
| 43 |
-
self.input_w = int(input_shape[3])
|
| 44 |
-
self.conf_threshold = 0.50 # sweep-optimised: composite 0.5379 at 0.50 vs 0.5045 at 0.70
|
| 45 |
-
self.iou_threshold = 0.45
|
| 46 |
|
| 47 |
def __repr__(self) -> str:
|
| 48 |
-
return f"
|
| 49 |
-
|
| 50 |
-
def
|
| 51 |
-
h, w =
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 62 |
pred = raw[0]
|
| 63 |
-
if pred.ndim != 2:
|
| 64 |
-
raise ValueError(f"Unexpected prediction shape: {raw.shape}")
|
| 65 |
if pred.shape[0] < pred.shape[1]:
|
| 66 |
-
pred = pred.
|
| 67 |
-
return pred
|
| 68 |
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
return []
|
| 72 |
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
keep = []
|
| 77 |
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
keep.append(i)
|
| 81 |
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
yy2 = np.minimum(boxes[i, 3], boxes[order[1:], 3])
|
| 86 |
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
|
|
|
|
|
|
| 90 |
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
|
|
|
| 95 |
|
| 96 |
-
|
| 97 |
-
order = order[remaining + 1]
|
| 98 |
|
| 99 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 100 |
|
| 101 |
def _infer_single(self, image_bgr: ndarray) -> list[BoundingBox]:
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
boxes
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 113 |
return []
|
| 114 |
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
confs = confs[keep]
|
| 121 |
-
cls_ids = cls_ids[keep]
|
| 122 |
|
| 123 |
-
if
|
| 124 |
return []
|
| 125 |
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
dets: list[tuple[float, float, float, float, float, int]] = []
|
| 130 |
-
for i in range(boxes.shape[0]):
|
| 131 |
-
cx, cy, bw, bh = boxes[i].tolist()
|
| 132 |
-
x1 = (cx - bw / 2.0) * sx
|
| 133 |
-
y1 = (cy - bh / 2.0) * sy
|
| 134 |
-
x2 = (cx + bw / 2.0) * sx
|
| 135 |
-
y2 = (cy + bh / 2.0) * sy
|
| 136 |
-
dets.append((x1, y1, x2, y2, float(confs[i]), int(cls_ids[i])))
|
| 137 |
-
|
| 138 |
-
dets = self._nms(dets)
|
| 139 |
|
| 140 |
out_boxes: list[BoundingBox] = []
|
| 141 |
-
for
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
y2=iy2,
|
| 152 |
-
cls_id=cls_id,
|
| 153 |
-
conf=max(0.0, min(1.0, conf)),
|
| 154 |
-
)
|
| 155 |
-
)
|
| 156 |
return out_boxes
|
| 157 |
|
| 158 |
def predict_batch(
|
|
@@ -165,11 +223,9 @@ class Miner:
|
|
| 165 |
for idx, image in enumerate(batch_images):
|
| 166 |
boxes = self._infer_single(image)
|
| 167 |
keypoints = [(0, 0) for _ in range(max(0, int(n_keypoints)))]
|
| 168 |
-
results.append(
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
)
|
| 174 |
-
)
|
| 175 |
return results
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Score Vision SN44 β VehicleDetect miner. v3 (2026-03-26).
|
| 3 |
+
TTA (3 augmentations) + WBF fusion. Letterbox preprocessing.
|
| 4 |
+
|
| 5 |
+
Model: YOLO11s ONNX, 4 classes trained as:
|
| 6 |
+
0 = car, 1 = bus, 2 = truck, 3 = motorcycle
|
| 7 |
+
|
| 8 |
+
Official submission order (remapped in MODEL_TO_OUT):
|
| 9 |
+
0 = bus, 1 = car, 2 = truck, 3 = motorcycle
|
| 10 |
+
"""
|
| 11 |
+
|
| 12 |
from pathlib import Path
|
| 13 |
import math
|
| 14 |
|
|
|
|
| 17 |
import onnxruntime as ort
|
| 18 |
from numpy import ndarray
|
| 19 |
from pydantic import BaseModel
|
| 20 |
+
from ensemble_boxes import weighted_boxes_fusion
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
# ββ Model class index β submission class index βββββββββββββββββββββββββββββββ
|
| 24 |
+
MODEL_TO_OUT: dict[int, int] = {0: 1, 1: 0, 2: 2, 3: 3}
|
| 25 |
+
OUT_TO_MODEL: dict[int, int] = {v: k for k, v in MODEL_TO_OUT.items()}
|
| 26 |
+
OUT_NAMES = ["bus", "car", "truck", "motorcycle"]
|
| 27 |
+
|
| 28 |
+
IMG_SIZE = 1280
|
| 29 |
+
CONF_THRESH = 0.55
|
| 30 |
+
IOU_THRESH = 0.45
|
| 31 |
+
WBF_IOU_THR = 0.55
|
| 32 |
+
WBF_SKIP_THR = 0.0001
|
| 33 |
+
TTA_SCALE = 1.2
|
| 34 |
|
| 35 |
|
| 36 |
class BoundingBox(BaseModel):
|
|
|
|
| 49 |
|
| 50 |
|
| 51 |
class Miner:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52 |
def __init__(self, path_hf_repo: Path) -> None:
|
| 53 |
self.path_hf_repo = path_hf_repo
|
|
|
|
| 54 |
self.session = ort.InferenceSession(
|
| 55 |
str(path_hf_repo / "weights.onnx"),
|
| 56 |
providers=["CUDAExecutionProvider", "CPUExecutionProvider"],
|
| 57 |
)
|
| 58 |
self.input_name = self.session.get_inputs()[0].name
|
| 59 |
+
self.conf_threshold = CONF_THRESH
|
| 60 |
+
self.iou_threshold = IOU_THRESH
|
|
|
|
|
|
|
|
|
|
|
|
|
| 61 |
|
| 62 |
def __repr__(self) -> str:
|
| 63 |
+
return f"VehicleDetect Miner TTA+WBF session={type(self.session).__name__}"
|
| 64 |
+
|
| 65 |
+
def _letterbox(self, img: ndarray) -> tuple[np.ndarray, float, int, int]:
|
| 66 |
+
h, w = img.shape[:2]
|
| 67 |
+
r = min(IMG_SIZE / h, IMG_SIZE / w)
|
| 68 |
+
new_w, new_h = int(round(w * r)), int(round(h * r))
|
| 69 |
+
img_r = cv2.resize(img, (new_w, new_h), interpolation=cv2.INTER_LINEAR)
|
| 70 |
+
dw, dh = IMG_SIZE - new_w, IMG_SIZE - new_h
|
| 71 |
+
pad_l, pad_t = dw // 2, dh // 2
|
| 72 |
+
img_p = cv2.copyMakeBorder(
|
| 73 |
+
img_r, pad_t, dh - pad_t, pad_l, dw - pad_l,
|
| 74 |
+
cv2.BORDER_CONSTANT, value=(114, 114, 114),
|
| 75 |
+
)
|
| 76 |
+
return img_p, r, pad_l, pad_t
|
| 77 |
+
|
| 78 |
+
def _preprocess(self, image_bgr: ndarray) -> tuple[np.ndarray, float, int, int]:
|
| 79 |
+
img_p, ratio, pad_l, pad_t = self._letterbox(image_bgr)
|
| 80 |
+
img_rgb = cv2.cvtColor(img_p, cv2.COLOR_BGR2RGB)
|
| 81 |
+
inp = img_rgb.astype(np.float32) / 255.0
|
| 82 |
+
inp = np.ascontiguousarray(inp.transpose(2, 0, 1)[np.newaxis])
|
| 83 |
+
return inp, ratio, pad_l, pad_t
|
| 84 |
+
|
| 85 |
+
def _decode_raw(self, raw: np.ndarray, ratio: float, pad_l: int, pad_t: int,
|
| 86 |
+
orig_w: int, orig_h: int) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
|
| 87 |
+
"""Decode ONNX output to (boxes_xyxy, confs, cls_ids) in original image coords."""
|
| 88 |
pred = raw[0]
|
|
|
|
|
|
|
| 89 |
if pred.shape[0] < pred.shape[1]:
|
| 90 |
+
pred = pred.T
|
|
|
|
| 91 |
|
| 92 |
+
bboxes_cx = pred[:, :4]
|
| 93 |
+
cls_scores = pred[:, 4:]
|
|
|
|
| 94 |
|
| 95 |
+
cls_ids = np.argmax(cls_scores, axis=1)
|
| 96 |
+
confs = np.max(cls_scores, axis=1)
|
| 97 |
+
mask = confs >= self.conf_threshold
|
|
|
|
| 98 |
|
| 99 |
+
if not mask.any():
|
| 100 |
+
return np.empty((0, 4)), np.empty(0), np.empty(0, dtype=int)
|
|
|
|
| 101 |
|
| 102 |
+
bboxes_cx = bboxes_cx[mask]
|
| 103 |
+
confs = confs[mask]
|
| 104 |
+
cls_ids = cls_ids[mask]
|
|
|
|
| 105 |
|
| 106 |
+
cx, cy, bw, bh = bboxes_cx[:, 0], bboxes_cx[:, 1], bboxes_cx[:, 2], bboxes_cx[:, 3]
|
| 107 |
+
lx1 = cx - bw / 2
|
| 108 |
+
ly1 = cy - bh / 2
|
| 109 |
+
lx2 = cx + bw / 2
|
| 110 |
+
ly2 = cy + bh / 2
|
| 111 |
|
| 112 |
+
x1 = np.clip((lx1 - pad_l) / ratio, 0, orig_w)
|
| 113 |
+
y1 = np.clip((ly1 - pad_t) / ratio, 0, orig_h)
|
| 114 |
+
x2 = np.clip((lx2 - pad_l) / ratio, 0, orig_w)
|
| 115 |
+
y2 = np.clip((ly2 - pad_t) / ratio, 0, orig_h)
|
| 116 |
+
boxes = np.stack([x1, y1, x2, y2], axis=1)
|
| 117 |
|
| 118 |
+
return boxes, confs, cls_ids
|
|
|
|
| 119 |
|
| 120 |
+
def _run_single_pass(self, image_bgr: ndarray) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
|
| 121 |
+
"""Run one inference pass, return (boxes_xyxy, confs, cls_ids) in original coords."""
|
| 122 |
+
orig_h, orig_w = image_bgr.shape[:2]
|
| 123 |
+
inp, ratio, pad_l, pad_t = self._preprocess(image_bgr)
|
| 124 |
+
raw = self.session.run(None, {self.input_name: inp})[0]
|
| 125 |
+
return self._decode_raw(raw, ratio, pad_l, pad_t, orig_w, orig_h)
|
| 126 |
|
| 127 |
def _infer_single(self, image_bgr: ndarray) -> list[BoundingBox]:
|
| 128 |
+
orig_h, orig_w = image_bgr.shape[:2]
|
| 129 |
+
|
| 130 |
+
all_boxes_list = []
|
| 131 |
+
all_scores_list = []
|
| 132 |
+
all_labels_list = []
|
| 133 |
+
|
| 134 |
+
# ββ TTA pass 1: original ββ
|
| 135 |
+
boxes, confs, cls_ids = self._run_single_pass(image_bgr)
|
| 136 |
+
if len(boxes):
|
| 137 |
+
# Remap to output class IDs for WBF
|
| 138 |
+
out_cls = np.array([MODEL_TO_OUT[int(c)] for c in cls_ids])
|
| 139 |
+
# Normalize to [0,1]
|
| 140 |
+
norm_boxes = boxes.copy()
|
| 141 |
+
norm_boxes[:, [0, 2]] /= orig_w
|
| 142 |
+
norm_boxes[:, [1, 3]] /= orig_h
|
| 143 |
+
norm_boxes = np.clip(norm_boxes, 0, 1)
|
| 144 |
+
all_boxes_list.append(norm_boxes)
|
| 145 |
+
all_scores_list.append(confs)
|
| 146 |
+
all_labels_list.append(out_cls)
|
| 147 |
+
|
| 148 |
+
# ββ TTA pass 2: horizontal flip ββ
|
| 149 |
+
flipped = cv2.flip(image_bgr, 1)
|
| 150 |
+
boxes_f, confs_f, cls_ids_f = self._run_single_pass(flipped)
|
| 151 |
+
if len(boxes_f):
|
| 152 |
+
# Flip x coords back
|
| 153 |
+
boxes_f[:, 0], boxes_f[:, 2] = orig_w - boxes_f[:, 2], orig_w - boxes_f[:, 0]
|
| 154 |
+
out_cls_f = np.array([MODEL_TO_OUT[int(c)] for c in cls_ids_f])
|
| 155 |
+
norm_boxes_f = boxes_f.copy()
|
| 156 |
+
norm_boxes_f[:, [0, 2]] /= orig_w
|
| 157 |
+
norm_boxes_f[:, [1, 3]] /= orig_h
|
| 158 |
+
norm_boxes_f = np.clip(norm_boxes_f, 0, 1)
|
| 159 |
+
all_boxes_list.append(norm_boxes_f)
|
| 160 |
+
all_scores_list.append(confs_f)
|
| 161 |
+
all_labels_list.append(out_cls_f)
|
| 162 |
+
|
| 163 |
+
# ββ TTA pass 3: scale 1.2x (center crop to original size) ββ
|
| 164 |
+
scaled_h, scaled_w = int(orig_h * TTA_SCALE), int(orig_w * TTA_SCALE)
|
| 165 |
+
scaled = cv2.resize(image_bgr, (scaled_w, scaled_h), interpolation=cv2.INTER_LINEAR)
|
| 166 |
+
# Center crop back to original size
|
| 167 |
+
y_off = (scaled_h - orig_h) // 2
|
| 168 |
+
x_off = (scaled_w - orig_w) // 2
|
| 169 |
+
cropped = scaled[y_off:y_off + orig_h, x_off:x_off + orig_w]
|
| 170 |
+
boxes_s, confs_s, cls_ids_s = self._run_single_pass(cropped)
|
| 171 |
+
if len(boxes_s):
|
| 172 |
+
# Map cropped coords back to original: offset + scale
|
| 173 |
+
boxes_s[:, 0] = (boxes_s[:, 0] + x_off) / TTA_SCALE
|
| 174 |
+
boxes_s[:, 1] = (boxes_s[:, 1] + y_off) / TTA_SCALE
|
| 175 |
+
boxes_s[:, 2] = (boxes_s[:, 2] + x_off) / TTA_SCALE
|
| 176 |
+
boxes_s[:, 3] = (boxes_s[:, 3] + y_off) / TTA_SCALE
|
| 177 |
+
boxes_s = np.clip(boxes_s, 0, [[orig_w, orig_h, orig_w, orig_h]])
|
| 178 |
+
out_cls_s = np.array([MODEL_TO_OUT[int(c)] for c in cls_ids_s])
|
| 179 |
+
norm_boxes_s = boxes_s.copy()
|
| 180 |
+
norm_boxes_s[:, [0, 2]] /= orig_w
|
| 181 |
+
norm_boxes_s[:, [1, 3]] /= orig_h
|
| 182 |
+
norm_boxes_s = np.clip(norm_boxes_s, 0, 1)
|
| 183 |
+
all_boxes_list.append(norm_boxes_s)
|
| 184 |
+
all_scores_list.append(confs_s)
|
| 185 |
+
all_labels_list.append(out_cls_s)
|
| 186 |
+
|
| 187 |
+
if not all_boxes_list:
|
| 188 |
return []
|
| 189 |
|
| 190 |
+
# ββ WBF fusion ββ
|
| 191 |
+
fused_boxes, fused_scores, fused_labels = weighted_boxes_fusion(
|
| 192 |
+
all_boxes_list, all_scores_list, all_labels_list,
|
| 193 |
+
iou_thr=WBF_IOU_THR, skip_box_thr=WBF_SKIP_THR,
|
| 194 |
+
)
|
|
|
|
|
|
|
| 195 |
|
| 196 |
+
if len(fused_boxes) == 0:
|
| 197 |
return []
|
| 198 |
|
| 199 |
+
# Denormalize
|
| 200 |
+
fused_boxes[:, [0, 2]] *= orig_w
|
| 201 |
+
fused_boxes[:, [1, 3]] *= orig_h
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 202 |
|
| 203 |
out_boxes: list[BoundingBox] = []
|
| 204 |
+
for i in range(len(fused_boxes)):
|
| 205 |
+
box = fused_boxes[i]
|
| 206 |
+
out_boxes.append(BoundingBox(
|
| 207 |
+
x1=max(0, min(orig_w, math.floor(box[0]))),
|
| 208 |
+
y1=max(0, min(orig_h, math.floor(box[1]))),
|
| 209 |
+
x2=max(0, min(orig_w, math.ceil(box[2]))),
|
| 210 |
+
y2=max(0, min(orig_h, math.ceil(box[3]))),
|
| 211 |
+
cls_id=int(fused_labels[i]),
|
| 212 |
+
conf=max(0.0, min(1.0, float(fused_scores[i]))),
|
| 213 |
+
))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 214 |
return out_boxes
|
| 215 |
|
| 216 |
def predict_batch(
|
|
|
|
| 223 |
for idx, image in enumerate(batch_images):
|
| 224 |
boxes = self._infer_single(image)
|
| 225 |
keypoints = [(0, 0) for _ in range(max(0, int(n_keypoints)))]
|
| 226 |
+
results.append(TVFrameResult(
|
| 227 |
+
frame_id=offset + idx,
|
| 228 |
+
boxes=boxes,
|
| 229 |
+
keypoints=keypoints,
|
| 230 |
+
))
|
|
|
|
|
|
|
| 231 |
return results
|
model_type.json
CHANGED
|
@@ -1 +1 @@
|
|
| 1 |
-
{"task_type": "object-detection", "model_type": "yolov11-
|
|
|
|
| 1 |
+
{"task_type": "object-detection", "model_type": "yolov11-small", "deploy": "2026-03-26T07:43Z"}
|
weights.onnx
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e3916408ec21f8c94358c18914f922814770b78557e52fe17ff7a9ee74339a5a
|
| 3 |
+
size 19272252
|