Spaces:

BiasLab2025
/

detection_base

Paused

Zhen Ye Claude Opus 4.6 commited on Feb 24

Commit

f89fa0b

1 Parent(s): 53922f5

refactor: rename hf_yolov8 → yolo11 across codebase

Rename file yolov8.py → yolov11.py, class HuggingFaceYoloV8Detector →
Yolo11Detector, and registry key "hf_yolov8" → "yolo11" in all 11
files. Loads YOLO11m COCO-pretrained via hf://Ultralytics/YOLO11.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

Files changed (11) hide show

app.py +5 -5
frontend/index.html +1 -1
frontend/js/main.js +2 -2
inference.py +1 -1
models/detectors/{yolov8.py → yolov11.py} +3 -3
models/model_loader.py +3 -3
models/segmenters/grounded_sam2.py +1 -1
models/segmenters/model_loader.py +3 -3
utils/mission_parser.py +1 -1
utils/profiler.py +1 -1
utils/roofline.py +2 -2

app.py CHANGED Viewed

@@ -248,7 +248,7 @@ async def detect_endpoint(
     video: UploadFile = File(...),
     mode: str = Form(...),
     queries: str = Form(""),
-    detector: str = Form("hf_yolov8"),
     segmenter: str = Form("GSAM2-L"),
     enable_depth: bool = Form(False),
     enable_gpt: bool = Form(True),
@@ -260,7 +260,7 @@ async def detect_endpoint(
         video: Video file to process
         mode: Detection mode (object_detection, segmentation, drone_detection)
         queries: Comma-separated object classes for object_detection mode
-        detector: Model to use (hf_yolov8, detr_resnet50, grounding_dino)
         segmenter: Segmentation model to use (GSAM2-S/B/L, YSAM2-S/B/L)
         enable_depth: Whether to run legacy depth estimation (default: False)
         drone_detection uses the dedicated drone_yolo model.
@@ -402,7 +402,7 @@ async def detect_async_endpoint(
     video: UploadFile = File(...),
     mode: str = Form(...),
     queries: str = Form(""),
-    detector: str = Form("hf_yolov8"),
     segmenter: str = Form("GSAM2-L"),
     depth_estimator: str = Form("depth"),
     depth_scale: float = Form(25.0),
@@ -1042,7 +1042,7 @@ async def benchmark_hardware():
 async def benchmark_profile(
     video: UploadFile = File(...),
     mode: str = Form("detection"),
-    detector: str = Form("hf_yolov8"),
     segmenter: str = Form("GSAM2-L"),
     queries: str = Form("person,car,truck"),
     max_frames: int = Form(100),
@@ -1108,7 +1108,7 @@ async def benchmark_profile(
 async def benchmark_analysis(
     video: UploadFile = File(...),
     mode: str = Form("detection"),
-    detector: str = Form("hf_yolov8"),
     segmenter: str = Form("GSAM2-L"),
     queries: str = Form("person,car,truck"),
     max_frames: int = Form(100),

     video: UploadFile = File(...),
     mode: str = Form(...),
     queries: str = Form(""),
+    detector: str = Form("yolo11"),
     segmenter: str = Form("GSAM2-L"),
     enable_depth: bool = Form(False),
     enable_gpt: bool = Form(True),
         video: Video file to process
         mode: Detection mode (object_detection, segmentation, drone_detection)
         queries: Comma-separated object classes for object_detection mode
+        detector: Model to use (yolo11, detr_resnet50, grounding_dino)
         segmenter: Segmentation model to use (GSAM2-S/B/L, YSAM2-S/B/L)
         enable_depth: Whether to run legacy depth estimation (default: False)
         drone_detection uses the dedicated drone_yolo model.
     video: UploadFile = File(...),
     mode: str = Form(...),
     queries: str = Form(""),
+    detector: str = Form("yolo11"),
     segmenter: str = Form("GSAM2-L"),
     depth_estimator: str = Form("depth"),
     depth_scale: float = Form(25.0),
 async def benchmark_profile(
     video: UploadFile = File(...),
     mode: str = Form("detection"),
+    detector: str = Form("yolo11"),
     segmenter: str = Form("GSAM2-L"),
     queries: str = Form("person,car,truck"),
     max_frames: int = Form(100),
 async def benchmark_analysis(
     video: UploadFile = File(...),
     mode: str = Form("detection"),
+    detector: str = Form("yolo11"),
     segmenter: str = Form("GSAM2-L"),
     queries: str = Form("person,car,truck"),
     max_frames: int = Form(100),

frontend/index.html CHANGED Viewed

@@ -70,7 +70,7 @@
               <label>Detector</label>
               <select id="detectorSelect">
                 <optgroup label="Object Detection Models">
-                  <option value="hf_yolov8" data-kind="object" selected>Lite</option>
                   <option value="detr_resnet50" data-kind="object">Big</option>
                   <option value="grounding_dino" data-kind="object">Large</option>
                 </optgroup>

               <label>Detector</label>
               <select id="detectorSelect">
                 <optgroup label="Object Detection Models">
+                  <option value="yolo11" data-kind="object" selected>Lite</option>
                   <option value="detr_resnet50" data-kind="object">Big</option>
                   <option value="grounding_dino" data-kind="object">Large</option>
                 </optgroup>

frontend/js/main.js CHANGED Viewed

@@ -348,7 +348,7 @@ document.addEventListener("DOMContentLoaded", () => {
         try {
             const selectedOption = detectorSelect ? detectorSelect.options[detectorSelect.selectedIndex] : null;
-            const selectedValue = detectorSelect ? detectorSelect.value : "hf_yolov8";
             const kind = selectedOption ? selectedOption.getAttribute("data-kind") : "object";
             const queries = missionText ? missionText.value.trim() : "";
             const enableGPT = $("#enableGPTToggle")?.checked || false;
@@ -359,7 +359,7 @@ document.addEventListener("DOMContentLoaded", () => {
             if (kind === "segmentation") {
                 mode = "segmentation";
                 segmenterParam = selectedValue;
-                detectorParam = "hf_yolov8"; // default, unused for segmentation
             } else if (kind === "drone") {
                 mode = "drone_detection";
                 detectorParam = selectedValue;

         try {
             const selectedOption = detectorSelect ? detectorSelect.options[detectorSelect.selectedIndex] : null;
+            const selectedValue = detectorSelect ? detectorSelect.value : "yolo11";
             const kind = selectedOption ? selectedOption.getAttribute("data-kind") : "object";
             const queries = missionText ? missionText.value.trim() : "";
             const enableGPT = $("#enableGPTToggle")?.checked || false;
             if (kind === "segmentation") {
                 mode = "segmentation";
                 segmenterParam = selectedValue;
+                detectorParam = "yolo11"; // default, unused for segmentation
             } else if (kind === "drone") {
                 mode = "drone_detection";
                 detectorParam = selectedValue;

inference.py CHANGED Viewed

@@ -717,7 +717,7 @@ def run_inference(
         logging.info("No queries provided, using defaults: %s", queries)
     logging.info("Detection queries: %s", queries)
-    active_detector = detector_name or "hf_yolov8"
     # Parallel Model Loading
     num_gpus = torch.cuda.device_count()

         logging.info("No queries provided, using defaults: %s", queries)
     logging.info("Detection queries: %s", queries)
+    active_detector = detector_name or "yolo11"
     # Parallel Model Loading
     num_gpus = torch.cuda.device_count()

models/detectors/{yolov8.py → yolov11.py} RENAMED Viewed

@@ -9,14 +9,14 @@ from models.detectors.base import DetectionResult, ObjectDetector
 from utils.tiling import get_slice_bboxes, slice_image, shift_bboxes, batched_nms
-class HuggingFaceYoloV8Detector(ObjectDetector):
     """YOLO11m detector with COCO-pretrained weights from Ultralytics."""
     supports_batch = True
     max_batch_size = 32
     def __init__(self, score_threshold: float = 0.3, device: str = None) -> None:
-        self.name = "hf_yolov8"
         self.score_threshold = score_threshold
         # CRITICAL: Store device as torch.device, NOT a string.
         # Ultralytics' select_device() sets CUDA_VISIBLE_DEVICES when it
@@ -31,7 +31,7 @@ class HuggingFaceYoloV8Detector(ObjectDetector):
             "Loading YOLO11m COCO-pretrained weights onto %s",
             self.device,
         )
-        self.model = YOLO("yolo11m.pt")
         self.model.to(self.device)
         self.class_names = self.model.names

 from utils.tiling import get_slice_bboxes, slice_image, shift_bboxes, batched_nms
+class Yolo11Detector(ObjectDetector):
     """YOLO11m detector with COCO-pretrained weights from Ultralytics."""
     supports_batch = True
     max_batch_size = 32
     def __init__(self, score_threshold: float = 0.3, device: str = None) -> None:
+        self.name = "yolo11"
         self.score_threshold = score_threshold
         # CRITICAL: Store device as torch.device, NOT a string.
         # Ultralytics' select_device() sets CUDA_VISIBLE_DEVICES when it
             "Loading YOLO11m COCO-pretrained weights onto %s",
             self.device,
         )
+        self.model = YOLO("hf://Ultralytics/YOLO11")
         self.model.to(self.device)
         self.class_names = self.model.names

models/model_loader.py CHANGED Viewed

@@ -6,13 +6,13 @@ from models.detectors.base import ObjectDetector
 from models.detectors.detr import DetrDetector
 from models.detectors.drone_yolo import DroneYoloDetector
 from models.detectors.grounding_dino import GroundingDinoDetector
-from models.detectors.yolov8 import HuggingFaceYoloV8Detector
-DEFAULT_DETECTOR = "hf_yolov8"
 _REGISTRY: Dict[str, Callable[[], ObjectDetector]] = {
-    "hf_yolov8": HuggingFaceYoloV8Detector,
     "detr_resnet50": DetrDetector,
     "grounding_dino": GroundingDinoDetector,
     "drone_yolo": DroneYoloDetector,

 from models.detectors.detr import DetrDetector
 from models.detectors.drone_yolo import DroneYoloDetector
 from models.detectors.grounding_dino import GroundingDinoDetector
+from models.detectors.yolov11 import Yolo11Detector
+DEFAULT_DETECTOR = "yolo11"
 _REGISTRY: Dict[str, Callable[[], ObjectDetector]] = {
+    "yolo11": Yolo11Detector,
     "detr_resnet50": DetrDetector,
     "grounding_dino": GroundingDinoDetector,
     "drone_yolo": DroneYoloDetector,

models/segmenters/grounded_sam2.py CHANGED Viewed

@@ -349,7 +349,7 @@ class GroundedSAM2Segmenter(Segmenter):
         self.num_maskmem = num_maskmem  # None = use default (7)
         self._detector_name = detector_name  # None = "grounding_dino"
         _size_suffix = {"small": "S", "base": "B", "large": "L"}
-        _det_prefix = {"hf_yolov8": "YSAM2"}
         _prefix = _det_prefix.get(detector_name, "GSAM2")
         self.name = f"{_prefix}-{_size_suffix[model_size]}"

         self.num_maskmem = num_maskmem  # None = use default (7)
         self._detector_name = detector_name  # None = "grounding_dino"
         _size_suffix = {"small": "S", "base": "B", "large": "L"}
+        _det_prefix = {"yolo11": "YSAM2"}
         _prefix = _det_prefix.get(detector_name, "GSAM2")
         self.name = f"{_prefix}-{_size_suffix[model_size]}"

models/segmenters/model_loader.py CHANGED Viewed

@@ -12,9 +12,9 @@ _SEGMENTER_SPECS: Dict[str, Tuple[str, Optional[str]]] = {
     "GSAM2-S": ("small", None),
     "GSAM2-B": ("base", None),
     "GSAM2-L": ("large", None),
-    "YSAM2-S": ("small", "hf_yolov8"),
-    "YSAM2-B": ("base", "hf_yolov8"),
-    "YSAM2-L": ("large", "hf_yolov8"),
 }

     "GSAM2-S": ("small", None),
     "GSAM2-B": ("base", None),
     "GSAM2-L": ("large", None),
+    "YSAM2-S": ("small", "yolo11"),
+    "YSAM2-B": ("base", "yolo11"),
+    "YSAM2-L": ("large", "yolo11"),
 }

utils/mission_parser.py CHANGED Viewed

@@ -25,7 +25,7 @@ from utils.schemas import MissionSpecification, RelevanceCriteria
 logger = logging.getLogger(__name__)
 # Detectors that only support COCO class vocabulary
-_COCO_ONLY_DETECTORS = frozenset({"hf_yolov8", "detr_resnet50"})
 class MissionParseError(ValueError):

 logger = logging.getLogger(__name__)
 # Detectors that only support COCO class vocabulary
+_COCO_ONLY_DETECTORS = frozenset({"yolo11", "detr_resnet50"})
 class MissionParseError(ValueError):

utils/profiler.py CHANGED Viewed

@@ -20,7 +20,7 @@ logger = logging.getLogger(__name__)
 # Detectors whose predict() can be decomposed into processor -> model -> post_process
 _DECOMPOSABLE_DETECTORS = {"detr_resnet50", "grounding_dino"}
 # Detectors with opaque predict() calls (YOLO-based)
-_OPAQUE_DETECTORS = {"hf_yolov8", "drone_yolo"}
 @dataclass

 # Detectors whose predict() can be decomposed into processor -> model -> post_process
 _DECOMPOSABLE_DETECTORS = {"detr_resnet50", "grounding_dino"}
 # Detectors with opaque predict() calls (YOLO-based)
+_OPAQUE_DETECTORS = {"yolo11", "drone_yolo"}
 @dataclass

utils/roofline.py CHANGED Viewed

@@ -15,7 +15,7 @@ logger = logging.getLogger(__name__)
 # These are rough estimates; actual FLOPs depend on input resolution and model variant.
 _MODEL_FLOPS: Dict[str, float] = {
     # Detection models (GFLOPs per frame)
-    "hf_yolov8": 78.9,           # YOLOv8m ~79 GFLOPs at 640px
     "detr_resnet50": 86.0,       # DETR-R50 ~86 GFLOPs at 800px
     "grounding_dino": 172.0,     # Grounding DINO-B ~172 GFLOPs
     "drone_yolo": 78.9,          # Same arch as YOLOv8m
@@ -34,7 +34,7 @@ _MODEL_FLOPS: Dict[str, float] = {
 # Approximate bytes moved per forward pass (weights + activations + I/O)
 _MODEL_BYTES: Dict[str, float] = {
     # In MB — approximate weight size + activation memory
-    "hf_yolov8": 52.0,
     "detr_resnet50": 166.0,
     "grounding_dino": 340.0,
     "drone_yolo": 52.0,

 # These are rough estimates; actual FLOPs depend on input resolution and model variant.
 _MODEL_FLOPS: Dict[str, float] = {
     # Detection models (GFLOPs per frame)
+    "yolo11": 78.9,           # YOLOv8m ~79 GFLOPs at 640px
     "detr_resnet50": 86.0,       # DETR-R50 ~86 GFLOPs at 800px
     "grounding_dino": 172.0,     # Grounding DINO-B ~172 GFLOPs
     "drone_yolo": 78.9,          # Same arch as YOLOv8m
 # Approximate bytes moved per forward pass (weights + activations + I/O)
 _MODEL_BYTES: Dict[str, float] = {
     # In MB — approximate weight size + activation memory
+    "yolo11": 52.0,
     "detr_resnet50": 166.0,
     "grounding_dino": 340.0,
     "drone_yolo": 52.0,