Zhen Ye Claude Opus 4.6 commited on
Commit
f89fa0b
·
1 Parent(s): 53922f5

refactor: rename hf_yolov8 → yolo11 across codebase

Browse files

Rename file yolov8.py → yolov11.py, class HuggingFaceYoloV8Detector →
Yolo11Detector, and registry key "hf_yolov8" → "yolo11" in all 11
files. Loads YOLO11m COCO-pretrained via hf://Ultralytics/YOLO11.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

app.py CHANGED
@@ -248,7 +248,7 @@ async def detect_endpoint(
248
  video: UploadFile = File(...),
249
  mode: str = Form(...),
250
  queries: str = Form(""),
251
- detector: str = Form("hf_yolov8"),
252
  segmenter: str = Form("GSAM2-L"),
253
  enable_depth: bool = Form(False),
254
  enable_gpt: bool = Form(True),
@@ -260,7 +260,7 @@ async def detect_endpoint(
260
  video: Video file to process
261
  mode: Detection mode (object_detection, segmentation, drone_detection)
262
  queries: Comma-separated object classes for object_detection mode
263
- detector: Model to use (hf_yolov8, detr_resnet50, grounding_dino)
264
  segmenter: Segmentation model to use (GSAM2-S/B/L, YSAM2-S/B/L)
265
  enable_depth: Whether to run legacy depth estimation (default: False)
266
  drone_detection uses the dedicated drone_yolo model.
@@ -402,7 +402,7 @@ async def detect_async_endpoint(
402
  video: UploadFile = File(...),
403
  mode: str = Form(...),
404
  queries: str = Form(""),
405
- detector: str = Form("hf_yolov8"),
406
  segmenter: str = Form("GSAM2-L"),
407
  depth_estimator: str = Form("depth"),
408
  depth_scale: float = Form(25.0),
@@ -1042,7 +1042,7 @@ async def benchmark_hardware():
1042
  async def benchmark_profile(
1043
  video: UploadFile = File(...),
1044
  mode: str = Form("detection"),
1045
- detector: str = Form("hf_yolov8"),
1046
  segmenter: str = Form("GSAM2-L"),
1047
  queries: str = Form("person,car,truck"),
1048
  max_frames: int = Form(100),
@@ -1108,7 +1108,7 @@ async def benchmark_profile(
1108
  async def benchmark_analysis(
1109
  video: UploadFile = File(...),
1110
  mode: str = Form("detection"),
1111
- detector: str = Form("hf_yolov8"),
1112
  segmenter: str = Form("GSAM2-L"),
1113
  queries: str = Form("person,car,truck"),
1114
  max_frames: int = Form(100),
 
248
  video: UploadFile = File(...),
249
  mode: str = Form(...),
250
  queries: str = Form(""),
251
+ detector: str = Form("yolo11"),
252
  segmenter: str = Form("GSAM2-L"),
253
  enable_depth: bool = Form(False),
254
  enable_gpt: bool = Form(True),
 
260
  video: Video file to process
261
  mode: Detection mode (object_detection, segmentation, drone_detection)
262
  queries: Comma-separated object classes for object_detection mode
263
+ detector: Model to use (yolo11, detr_resnet50, grounding_dino)
264
  segmenter: Segmentation model to use (GSAM2-S/B/L, YSAM2-S/B/L)
265
  enable_depth: Whether to run legacy depth estimation (default: False)
266
  drone_detection uses the dedicated drone_yolo model.
 
402
  video: UploadFile = File(...),
403
  mode: str = Form(...),
404
  queries: str = Form(""),
405
+ detector: str = Form("yolo11"),
406
  segmenter: str = Form("GSAM2-L"),
407
  depth_estimator: str = Form("depth"),
408
  depth_scale: float = Form(25.0),
 
1042
  async def benchmark_profile(
1043
  video: UploadFile = File(...),
1044
  mode: str = Form("detection"),
1045
+ detector: str = Form("yolo11"),
1046
  segmenter: str = Form("GSAM2-L"),
1047
  queries: str = Form("person,car,truck"),
1048
  max_frames: int = Form(100),
 
1108
  async def benchmark_analysis(
1109
  video: UploadFile = File(...),
1110
  mode: str = Form("detection"),
1111
+ detector: str = Form("yolo11"),
1112
  segmenter: str = Form("GSAM2-L"),
1113
  queries: str = Form("person,car,truck"),
1114
  max_frames: int = Form(100),
frontend/index.html CHANGED
@@ -70,7 +70,7 @@
70
  <label>Detector</label>
71
  <select id="detectorSelect">
72
  <optgroup label="Object Detection Models">
73
- <option value="hf_yolov8" data-kind="object" selected>Lite</option>
74
  <option value="detr_resnet50" data-kind="object">Big</option>
75
  <option value="grounding_dino" data-kind="object">Large</option>
76
  </optgroup>
 
70
  <label>Detector</label>
71
  <select id="detectorSelect">
72
  <optgroup label="Object Detection Models">
73
+ <option value="yolo11" data-kind="object" selected>Lite</option>
74
  <option value="detr_resnet50" data-kind="object">Big</option>
75
  <option value="grounding_dino" data-kind="object">Large</option>
76
  </optgroup>
frontend/js/main.js CHANGED
@@ -348,7 +348,7 @@ document.addEventListener("DOMContentLoaded", () => {
348
 
349
  try {
350
  const selectedOption = detectorSelect ? detectorSelect.options[detectorSelect.selectedIndex] : null;
351
- const selectedValue = detectorSelect ? detectorSelect.value : "hf_yolov8";
352
  const kind = selectedOption ? selectedOption.getAttribute("data-kind") : "object";
353
  const queries = missionText ? missionText.value.trim() : "";
354
  const enableGPT = $("#enableGPTToggle")?.checked || false;
@@ -359,7 +359,7 @@ document.addEventListener("DOMContentLoaded", () => {
359
  if (kind === "segmentation") {
360
  mode = "segmentation";
361
  segmenterParam = selectedValue;
362
- detectorParam = "hf_yolov8"; // default, unused for segmentation
363
  } else if (kind === "drone") {
364
  mode = "drone_detection";
365
  detectorParam = selectedValue;
 
348
 
349
  try {
350
  const selectedOption = detectorSelect ? detectorSelect.options[detectorSelect.selectedIndex] : null;
351
+ const selectedValue = detectorSelect ? detectorSelect.value : "yolo11";
352
  const kind = selectedOption ? selectedOption.getAttribute("data-kind") : "object";
353
  const queries = missionText ? missionText.value.trim() : "";
354
  const enableGPT = $("#enableGPTToggle")?.checked || false;
 
359
  if (kind === "segmentation") {
360
  mode = "segmentation";
361
  segmenterParam = selectedValue;
362
+ detectorParam = "yolo11"; // default, unused for segmentation
363
  } else if (kind === "drone") {
364
  mode = "drone_detection";
365
  detectorParam = selectedValue;
inference.py CHANGED
@@ -717,7 +717,7 @@ def run_inference(
717
  logging.info("No queries provided, using defaults: %s", queries)
718
 
719
  logging.info("Detection queries: %s", queries)
720
- active_detector = detector_name or "hf_yolov8"
721
 
722
  # Parallel Model Loading
723
  num_gpus = torch.cuda.device_count()
 
717
  logging.info("No queries provided, using defaults: %s", queries)
718
 
719
  logging.info("Detection queries: %s", queries)
720
+ active_detector = detector_name or "yolo11"
721
 
722
  # Parallel Model Loading
723
  num_gpus = torch.cuda.device_count()
models/detectors/{yolov8.py → yolov11.py} RENAMED
@@ -9,14 +9,14 @@ from models.detectors.base import DetectionResult, ObjectDetector
9
  from utils.tiling import get_slice_bboxes, slice_image, shift_bboxes, batched_nms
10
 
11
 
12
- class HuggingFaceYoloV8Detector(ObjectDetector):
13
  """YOLO11m detector with COCO-pretrained weights from Ultralytics."""
14
 
15
  supports_batch = True
16
  max_batch_size = 32
17
 
18
  def __init__(self, score_threshold: float = 0.3, device: str = None) -> None:
19
- self.name = "hf_yolov8"
20
  self.score_threshold = score_threshold
21
  # CRITICAL: Store device as torch.device, NOT a string.
22
  # Ultralytics' select_device() sets CUDA_VISIBLE_DEVICES when it
@@ -31,7 +31,7 @@ class HuggingFaceYoloV8Detector(ObjectDetector):
31
  "Loading YOLO11m COCO-pretrained weights onto %s",
32
  self.device,
33
  )
34
- self.model = YOLO("yolo11m.pt")
35
  self.model.to(self.device)
36
  self.class_names = self.model.names
37
 
 
9
  from utils.tiling import get_slice_bboxes, slice_image, shift_bboxes, batched_nms
10
 
11
 
12
+ class Yolo11Detector(ObjectDetector):
13
  """YOLO11m detector with COCO-pretrained weights from Ultralytics."""
14
 
15
  supports_batch = True
16
  max_batch_size = 32
17
 
18
  def __init__(self, score_threshold: float = 0.3, device: str = None) -> None:
19
+ self.name = "yolo11"
20
  self.score_threshold = score_threshold
21
  # CRITICAL: Store device as torch.device, NOT a string.
22
  # Ultralytics' select_device() sets CUDA_VISIBLE_DEVICES when it
 
31
  "Loading YOLO11m COCO-pretrained weights onto %s",
32
  self.device,
33
  )
34
+ self.model = YOLO("hf://Ultralytics/YOLO11")
35
  self.model.to(self.device)
36
  self.class_names = self.model.names
37
 
models/model_loader.py CHANGED
@@ -6,13 +6,13 @@ from models.detectors.base import ObjectDetector
6
  from models.detectors.detr import DetrDetector
7
  from models.detectors.drone_yolo import DroneYoloDetector
8
  from models.detectors.grounding_dino import GroundingDinoDetector
9
- from models.detectors.yolov8 import HuggingFaceYoloV8Detector
10
 
11
 
12
- DEFAULT_DETECTOR = "hf_yolov8"
13
 
14
  _REGISTRY: Dict[str, Callable[[], ObjectDetector]] = {
15
- "hf_yolov8": HuggingFaceYoloV8Detector,
16
  "detr_resnet50": DetrDetector,
17
  "grounding_dino": GroundingDinoDetector,
18
  "drone_yolo": DroneYoloDetector,
 
6
  from models.detectors.detr import DetrDetector
7
  from models.detectors.drone_yolo import DroneYoloDetector
8
  from models.detectors.grounding_dino import GroundingDinoDetector
9
+ from models.detectors.yolov11 import Yolo11Detector
10
 
11
 
12
+ DEFAULT_DETECTOR = "yolo11"
13
 
14
  _REGISTRY: Dict[str, Callable[[], ObjectDetector]] = {
15
+ "yolo11": Yolo11Detector,
16
  "detr_resnet50": DetrDetector,
17
  "grounding_dino": GroundingDinoDetector,
18
  "drone_yolo": DroneYoloDetector,
models/segmenters/grounded_sam2.py CHANGED
@@ -349,7 +349,7 @@ class GroundedSAM2Segmenter(Segmenter):
349
  self.num_maskmem = num_maskmem # None = use default (7)
350
  self._detector_name = detector_name # None = "grounding_dino"
351
  _size_suffix = {"small": "S", "base": "B", "large": "L"}
352
- _det_prefix = {"hf_yolov8": "YSAM2"}
353
  _prefix = _det_prefix.get(detector_name, "GSAM2")
354
  self.name = f"{_prefix}-{_size_suffix[model_size]}"
355
 
 
349
  self.num_maskmem = num_maskmem # None = use default (7)
350
  self._detector_name = detector_name # None = "grounding_dino"
351
  _size_suffix = {"small": "S", "base": "B", "large": "L"}
352
+ _det_prefix = {"yolo11": "YSAM2"}
353
  _prefix = _det_prefix.get(detector_name, "GSAM2")
354
  self.name = f"{_prefix}-{_size_suffix[model_size]}"
355
 
models/segmenters/model_loader.py CHANGED
@@ -12,9 +12,9 @@ _SEGMENTER_SPECS: Dict[str, Tuple[str, Optional[str]]] = {
12
  "GSAM2-S": ("small", None),
13
  "GSAM2-B": ("base", None),
14
  "GSAM2-L": ("large", None),
15
- "YSAM2-S": ("small", "hf_yolov8"),
16
- "YSAM2-B": ("base", "hf_yolov8"),
17
- "YSAM2-L": ("large", "hf_yolov8"),
18
  }
19
 
20
 
 
12
  "GSAM2-S": ("small", None),
13
  "GSAM2-B": ("base", None),
14
  "GSAM2-L": ("large", None),
15
+ "YSAM2-S": ("small", "yolo11"),
16
+ "YSAM2-B": ("base", "yolo11"),
17
+ "YSAM2-L": ("large", "yolo11"),
18
  }
19
 
20
 
utils/mission_parser.py CHANGED
@@ -25,7 +25,7 @@ from utils.schemas import MissionSpecification, RelevanceCriteria
25
  logger = logging.getLogger(__name__)
26
 
27
  # Detectors that only support COCO class vocabulary
28
- _COCO_ONLY_DETECTORS = frozenset({"hf_yolov8", "detr_resnet50"})
29
 
30
 
31
  class MissionParseError(ValueError):
 
25
  logger = logging.getLogger(__name__)
26
 
27
  # Detectors that only support COCO class vocabulary
28
+ _COCO_ONLY_DETECTORS = frozenset({"yolo11", "detr_resnet50"})
29
 
30
 
31
  class MissionParseError(ValueError):
utils/profiler.py CHANGED
@@ -20,7 +20,7 @@ logger = logging.getLogger(__name__)
20
  # Detectors whose predict() can be decomposed into processor -> model -> post_process
21
  _DECOMPOSABLE_DETECTORS = {"detr_resnet50", "grounding_dino"}
22
  # Detectors with opaque predict() calls (YOLO-based)
23
- _OPAQUE_DETECTORS = {"hf_yolov8", "drone_yolo"}
24
 
25
 
26
  @dataclass
 
20
  # Detectors whose predict() can be decomposed into processor -> model -> post_process
21
  _DECOMPOSABLE_DETECTORS = {"detr_resnet50", "grounding_dino"}
22
  # Detectors with opaque predict() calls (YOLO-based)
23
+ _OPAQUE_DETECTORS = {"yolo11", "drone_yolo"}
24
 
25
 
26
  @dataclass
utils/roofline.py CHANGED
@@ -15,7 +15,7 @@ logger = logging.getLogger(__name__)
15
  # These are rough estimates; actual FLOPs depend on input resolution and model variant.
16
  _MODEL_FLOPS: Dict[str, float] = {
17
  # Detection models (GFLOPs per frame)
18
- "hf_yolov8": 78.9, # YOLOv8m ~79 GFLOPs at 640px
19
  "detr_resnet50": 86.0, # DETR-R50 ~86 GFLOPs at 800px
20
  "grounding_dino": 172.0, # Grounding DINO-B ~172 GFLOPs
21
  "drone_yolo": 78.9, # Same arch as YOLOv8m
@@ -34,7 +34,7 @@ _MODEL_FLOPS: Dict[str, float] = {
34
  # Approximate bytes moved per forward pass (weights + activations + I/O)
35
  _MODEL_BYTES: Dict[str, float] = {
36
  # In MB — approximate weight size + activation memory
37
- "hf_yolov8": 52.0,
38
  "detr_resnet50": 166.0,
39
  "grounding_dino": 340.0,
40
  "drone_yolo": 52.0,
 
15
  # These are rough estimates; actual FLOPs depend on input resolution and model variant.
16
  _MODEL_FLOPS: Dict[str, float] = {
17
  # Detection models (GFLOPs per frame)
18
+ "yolo11": 78.9, # YOLOv8m ~79 GFLOPs at 640px
19
  "detr_resnet50": 86.0, # DETR-R50 ~86 GFLOPs at 800px
20
  "grounding_dino": 172.0, # Grounding DINO-B ~172 GFLOPs
21
  "drone_yolo": 78.9, # Same arch as YOLOv8m
 
34
  # Approximate bytes moved per forward pass (weights + activations + I/O)
35
  _MODEL_BYTES: Dict[str, float] = {
36
  # In MB — approximate weight size + activation memory
37
+ "yolo11": 52.0,
38
  "detr_resnet50": 166.0,
39
  "grounding_dino": 340.0,
40
  "drone_yolo": 52.0,