Spaces:
Sleeping
Sleeping
Zhen Ye
commited on
Commit
·
a5f8d15
1
Parent(s):
356dce8
perf: Tune batch sizes and queues for A10 GPUs
Browse files- Increased YOLO (HF/Drone) batch size to 32
- Increased Depth Anything batch size to 16
- Increased SAM3 batch size to 8
- Increased DETR batch size to 8
- Expanded inference.py queue_out to handle 32-frame batch bursts
- inference.py +2 -2
- models/depth_estimators/depth_anything_v2.py +1 -1
- models/detectors/detr.py +1 -1
- models/detectors/drone_yolo.py +1 -1
- models/detectors/yolov8.py +1 -1
- models/segmenters/sam3.py +1 -1
inference.py
CHANGED
|
@@ -724,11 +724,11 @@ def run_inference(
|
|
| 724 |
except Exception as e:
|
| 725 |
logging.warning("Pre-scan failed, using default range: %s", e)
|
| 726 |
|
| 727 |
-
# 5. Processing Queues
|
| 728 |
# queue_in: (frame_idx, frame_data)
|
| 729 |
# queue_out: (frame_idx, processed_frame, detections)
|
| 730 |
queue_in = Queue(maxsize=16)
|
| 731 |
-
|
|
|
|
| 732 |
queue_out = Queue(maxsize=queue_out_max)
|
| 733 |
|
| 734 |
# 6. Worker Function (Unified)
|
|
|
|
| 724 |
except Exception as e:
|
| 725 |
logging.warning("Pre-scan failed, using default range: %s", e)
|
| 726 |
|
|
|
|
| 727 |
# queue_in: (frame_idx, frame_data)
|
| 728 |
# queue_out: (frame_idx, processed_frame, detections)
|
| 729 |
queue_in = Queue(maxsize=16)
|
| 730 |
+
# Tuning for A10: buffer at least 32 frames per GPU (batch size)
|
| 731 |
+
queue_out_max = max(64, (len(detectors) if detectors else 1) * 32)
|
| 732 |
queue_out = Queue(maxsize=queue_out_max)
|
| 733 |
|
| 734 |
# 6. Worker Function (Unified)
|
models/depth_estimators/depth_anything_v2.py
CHANGED
|
@@ -14,7 +14,7 @@ class DepthAnythingV2Estimator(DepthEstimator):
|
|
| 14 |
|
| 15 |
name = "depth"
|
| 16 |
supports_batch = True
|
| 17 |
-
max_batch_size =
|
| 18 |
|
| 19 |
def _resize_depth(self, raw_depth, height, width):
|
| 20 |
if raw_depth.dim() == 2:
|
|
|
|
| 14 |
|
| 15 |
name = "depth"
|
| 16 |
supports_batch = True
|
| 17 |
+
max_batch_size = 16
|
| 18 |
|
| 19 |
def _resize_depth(self, raw_depth, height, width):
|
| 20 |
if raw_depth.dim() == 2:
|
models/detectors/detr.py
CHANGED
|
@@ -27,7 +27,7 @@ class DetrDetector(ObjectDetector):
|
|
| 27 |
self.model.eval()
|
| 28 |
|
| 29 |
supports_batch = True
|
| 30 |
-
max_batch_size =
|
| 31 |
|
| 32 |
def _parse_single_result(self, processed) -> DetectionResult:
|
| 33 |
boxes = processed["boxes"].cpu().numpy()
|
|
|
|
| 27 |
self.model.eval()
|
| 28 |
|
| 29 |
supports_batch = True
|
| 30 |
+
max_batch_size = 8
|
| 31 |
|
| 32 |
def _parse_single_result(self, processed) -> DetectionResult:
|
| 33 |
boxes = processed["boxes"].cpu().numpy()
|
models/detectors/drone_yolo.py
CHANGED
|
@@ -14,7 +14,7 @@ class DroneYoloDetector(ObjectDetector):
|
|
| 14 |
|
| 15 |
REPO_ID = "rujutashashikanjoshi/yolo12-drone-detection-0205-100m"
|
| 16 |
supports_batch = True
|
| 17 |
-
max_batch_size =
|
| 18 |
|
| 19 |
def __init__(self, score_threshold: float = 0.3, device: str = None) -> None:
|
| 20 |
self.name = "drone_yolo"
|
|
|
|
| 14 |
|
| 15 |
REPO_ID = "rujutashashikanjoshi/yolo12-drone-detection-0205-100m"
|
| 16 |
supports_batch = True
|
| 17 |
+
max_batch_size = 32
|
| 18 |
|
| 19 |
def __init__(self, score_threshold: float = 0.3, device: str = None) -> None:
|
| 20 |
self.name = "drone_yolo"
|
models/detectors/yolov8.py
CHANGED
|
@@ -15,7 +15,7 @@ class HuggingFaceYoloV8Detector(ObjectDetector):
|
|
| 15 |
REPO_ID = "spencercdz/YOLOv8m_defence"
|
| 16 |
WEIGHT_FILE = "yolov8m_defence.pt"
|
| 17 |
supports_batch = True
|
| 18 |
-
max_batch_size =
|
| 19 |
|
| 20 |
def __init__(self, score_threshold: float = 0.3, device: str = None) -> None:
|
| 21 |
self.name = "hf_yolov8"
|
|
|
|
| 15 |
REPO_ID = "spencercdz/YOLOv8m_defence"
|
| 16 |
WEIGHT_FILE = "yolov8m_defence.pt"
|
| 17 |
supports_batch = True
|
| 18 |
+
max_batch_size = 32
|
| 19 |
|
| 20 |
def __init__(self, score_threshold: float = 0.3, device: str = None) -> None:
|
| 21 |
self.name = "hf_yolov8"
|
models/segmenters/sam3.py
CHANGED
|
@@ -56,7 +56,7 @@ class SAM3Segmenter(Segmenter):
|
|
| 56 |
logging.info("SAM3 model loaded successfully")
|
| 57 |
|
| 58 |
supports_batch = True
|
| 59 |
-
max_batch_size =
|
| 60 |
|
| 61 |
def _parse_single_result(self, results, frame_shape) -> SegmentationResult:
|
| 62 |
# Extract results
|
|
|
|
| 56 |
logging.info("SAM3 model loaded successfully")
|
| 57 |
|
| 58 |
supports_batch = True
|
| 59 |
+
max_batch_size = 8
|
| 60 |
|
| 61 |
def _parse_single_result(self, results, frame_shape) -> SegmentationResult:
|
| 62 |
# Extract results
|