Spaces:

BiasLab2025
/

perception

Sleeping

Zhen Ye commited on 18 days ago

Commit

a5f8d15

1 Parent(s): 356dce8

perf: Tune batch sizes and queues for A10 GPUs

- Increased YOLO (HF/Drone) batch size to 32
- Increased Depth Anything batch size to 16
- Increased SAM3 batch size to 8
- Increased DETR batch size to 8
- Expanded inference.py queue_out to handle 32-frame batch bursts

Files changed (6) hide show

inference.py +2 -2
models/depth_estimators/depth_anything_v2.py +1 -1
models/detectors/detr.py +1 -1
models/detectors/drone_yolo.py +1 -1
models/detectors/yolov8.py +1 -1
models/segmenters/sam3.py +1 -1

inference.py CHANGED Viewed

@@ -724,11 +724,11 @@ def run_inference(
         except Exception as e:
              logging.warning("Pre-scan failed, using default range: %s", e)
-    # 5. Processing Queues
     # queue_in: (frame_idx, frame_data)
     # queue_out: (frame_idx, processed_frame, detections)
     queue_in = Queue(maxsize=16)
-    queue_out_max = max(32, (len(detectors) if detectors else 1) * 4)
     queue_out = Queue(maxsize=queue_out_max)
     # 6. Worker Function (Unified)

         except Exception as e:
              logging.warning("Pre-scan failed, using default range: %s", e)
     # queue_in: (frame_idx, frame_data)
     # queue_out: (frame_idx, processed_frame, detections)
     queue_in = Queue(maxsize=16)
+    # Tuning for A10: buffer at least 32 frames per GPU (batch size)
+    queue_out_max = max(64, (len(detectors) if detectors else 1) * 32)
     queue_out = Queue(maxsize=queue_out_max)
     # 6. Worker Function (Unified)

models/depth_estimators/depth_anything_v2.py CHANGED Viewed

@@ -14,7 +14,7 @@ class DepthAnythingV2Estimator(DepthEstimator):
     name = "depth"
     supports_batch = True
-    max_batch_size = 4
     def _resize_depth(self, raw_depth, height, width):
         if raw_depth.dim() == 2:

     name = "depth"
     supports_batch = True
+    max_batch_size = 16
     def _resize_depth(self, raw_depth, height, width):
         if raw_depth.dim() == 2:

models/detectors/detr.py CHANGED Viewed

@@ -27,7 +27,7 @@ class DetrDetector(ObjectDetector):
         self.model.eval()
     supports_batch = True
-    max_batch_size = 4
     def _parse_single_result(self, processed) -> DetectionResult:
         boxes = processed["boxes"].cpu().numpy()

         self.model.eval()
     supports_batch = True
+    max_batch_size = 8
     def _parse_single_result(self, processed) -> DetectionResult:
         boxes = processed["boxes"].cpu().numpy()

models/detectors/drone_yolo.py CHANGED Viewed

@@ -14,7 +14,7 @@ class DroneYoloDetector(ObjectDetector):
     REPO_ID = "rujutashashikanjoshi/yolo12-drone-detection-0205-100m"
     supports_batch = True
-    max_batch_size = 8
     def __init__(self, score_threshold: float = 0.3, device: str = None) -> None:
         self.name = "drone_yolo"

     REPO_ID = "rujutashashikanjoshi/yolo12-drone-detection-0205-100m"
     supports_batch = True
+    max_batch_size = 32
     def __init__(self, score_threshold: float = 0.3, device: str = None) -> None:
         self.name = "drone_yolo"

models/detectors/yolov8.py CHANGED Viewed

@@ -15,7 +15,7 @@ class HuggingFaceYoloV8Detector(ObjectDetector):
     REPO_ID = "spencercdz/YOLOv8m_defence"
     WEIGHT_FILE = "yolov8m_defence.pt"
     supports_batch = True
-    max_batch_size = 8
     def __init__(self, score_threshold: float = 0.3, device: str = None) -> None:
         self.name = "hf_yolov8"

     REPO_ID = "spencercdz/YOLOv8m_defence"
     WEIGHT_FILE = "yolov8m_defence.pt"
     supports_batch = True
+    max_batch_size = 32
     def __init__(self, score_threshold: float = 0.3, device: str = None) -> None:
         self.name = "hf_yolov8"

models/segmenters/sam3.py CHANGED Viewed

@@ -56,7 +56,7 @@ class SAM3Segmenter(Segmenter):
         logging.info("SAM3 model loaded successfully")
     supports_batch = True
-    max_batch_size = 4
     def _parse_single_result(self, results, frame_shape) -> SegmentationResult:
         # Extract results

         logging.info("SAM3 model loaded successfully")
     supports_batch = True
+    max_batch_size = 8
     def _parse_single_result(self, results, frame_shape) -> SegmentationResult:
         # Extract results