Zhen Ye commited on
Commit
a5f8d15
·
1 Parent(s): 356dce8

perf: Tune batch sizes and queues for A10 GPUs

Browse files

- Increased YOLO (HF/Drone) batch size to 32
- Increased Depth Anything batch size to 16
- Increased SAM3 batch size to 8
- Increased DETR batch size to 8
- Expanded inference.py queue_out to handle 32-frame batch bursts

inference.py CHANGED
@@ -724,11 +724,11 @@ def run_inference(
724
  except Exception as e:
725
  logging.warning("Pre-scan failed, using default range: %s", e)
726
 
727
- # 5. Processing Queues
728
  # queue_in: (frame_idx, frame_data)
729
  # queue_out: (frame_idx, processed_frame, detections)
730
  queue_in = Queue(maxsize=16)
731
- queue_out_max = max(32, (len(detectors) if detectors else 1) * 4)
 
732
  queue_out = Queue(maxsize=queue_out_max)
733
 
734
  # 6. Worker Function (Unified)
 
724
  except Exception as e:
725
  logging.warning("Pre-scan failed, using default range: %s", e)
726
 
 
727
  # queue_in: (frame_idx, frame_data)
728
  # queue_out: (frame_idx, processed_frame, detections)
729
  queue_in = Queue(maxsize=16)
730
+ # Tuning for A10: buffer at least 32 frames per GPU (batch size)
731
+ queue_out_max = max(64, (len(detectors) if detectors else 1) * 32)
732
  queue_out = Queue(maxsize=queue_out_max)
733
 
734
  # 6. Worker Function (Unified)
models/depth_estimators/depth_anything_v2.py CHANGED
@@ -14,7 +14,7 @@ class DepthAnythingV2Estimator(DepthEstimator):
14
 
15
  name = "depth"
16
  supports_batch = True
17
- max_batch_size = 4
18
 
19
  def _resize_depth(self, raw_depth, height, width):
20
  if raw_depth.dim() == 2:
 
14
 
15
  name = "depth"
16
  supports_batch = True
17
+ max_batch_size = 16
18
 
19
  def _resize_depth(self, raw_depth, height, width):
20
  if raw_depth.dim() == 2:
models/detectors/detr.py CHANGED
@@ -27,7 +27,7 @@ class DetrDetector(ObjectDetector):
27
  self.model.eval()
28
 
29
  supports_batch = True
30
- max_batch_size = 4
31
 
32
  def _parse_single_result(self, processed) -> DetectionResult:
33
  boxes = processed["boxes"].cpu().numpy()
 
27
  self.model.eval()
28
 
29
  supports_batch = True
30
+ max_batch_size = 8
31
 
32
  def _parse_single_result(self, processed) -> DetectionResult:
33
  boxes = processed["boxes"].cpu().numpy()
models/detectors/drone_yolo.py CHANGED
@@ -14,7 +14,7 @@ class DroneYoloDetector(ObjectDetector):
14
 
15
  REPO_ID = "rujutashashikanjoshi/yolo12-drone-detection-0205-100m"
16
  supports_batch = True
17
- max_batch_size = 8
18
 
19
  def __init__(self, score_threshold: float = 0.3, device: str = None) -> None:
20
  self.name = "drone_yolo"
 
14
 
15
  REPO_ID = "rujutashashikanjoshi/yolo12-drone-detection-0205-100m"
16
  supports_batch = True
17
+ max_batch_size = 32
18
 
19
  def __init__(self, score_threshold: float = 0.3, device: str = None) -> None:
20
  self.name = "drone_yolo"
models/detectors/yolov8.py CHANGED
@@ -15,7 +15,7 @@ class HuggingFaceYoloV8Detector(ObjectDetector):
15
  REPO_ID = "spencercdz/YOLOv8m_defence"
16
  WEIGHT_FILE = "yolov8m_defence.pt"
17
  supports_batch = True
18
- max_batch_size = 8
19
 
20
  def __init__(self, score_threshold: float = 0.3, device: str = None) -> None:
21
  self.name = "hf_yolov8"
 
15
  REPO_ID = "spencercdz/YOLOv8m_defence"
16
  WEIGHT_FILE = "yolov8m_defence.pt"
17
  supports_batch = True
18
+ max_batch_size = 32
19
 
20
  def __init__(self, score_threshold: float = 0.3, device: str = None) -> None:
21
  self.name = "hf_yolov8"
models/segmenters/sam3.py CHANGED
@@ -56,7 +56,7 @@ class SAM3Segmenter(Segmenter):
56
  logging.info("SAM3 model loaded successfully")
57
 
58
  supports_batch = True
59
- max_batch_size = 4
60
 
61
  def _parse_single_result(self, results, frame_shape) -> SegmentationResult:
62
  # Extract results
 
56
  logging.info("SAM3 model loaded successfully")
57
 
58
  supports_batch = True
59
+ max_batch_size = 8
60
 
61
  def _parse_single_result(self, results, frame_shape) -> SegmentationResult:
62
  # Extract results