Spaces:
Sleeping
Sleeping
Zhen Ye
commited on
Commit
·
89b854c
1
Parent(s):
fe670b9
fix: Increase queue buffer to absorb GPT latency
Browse files- inference.py +4 -3
inference.py
CHANGED
|
@@ -972,7 +972,8 @@ def run_inference(
|
|
| 972 |
# queue_out: (frame_idx, processed_frame, detections)
|
| 973 |
queue_in = Queue(maxsize=16)
|
| 974 |
# Tuning for A10: buffer at least 32 frames per GPU (batch size)
|
| 975 |
-
|
|
|
|
| 976 |
queue_out = Queue(maxsize=queue_out_max)
|
| 977 |
|
| 978 |
|
|
@@ -1122,8 +1123,8 @@ def run_inference(
|
|
| 1122 |
# However, if 'buffer' grows too large (because we are missing next_idx),
|
| 1123 |
# we are effectively unbounded again if queue_out fills up with future frames.
|
| 1124 |
# So we should monitor buffer size.
|
| 1125 |
-
if len(buffer) >
|
| 1126 |
-
logging.warning("Writer buffer large (%d items), waiting for frame %d...", len(buffer), next_idx)
|
| 1127 |
|
| 1128 |
item = queue_out.get(timeout=1.0) # wait
|
| 1129 |
|
|
|
|
| 972 |
# queue_out: (frame_idx, processed_frame, detections)
|
| 973 |
queue_in = Queue(maxsize=16)
|
| 974 |
# Tuning for A10: buffer at least 32 frames per GPU (batch size)
|
| 975 |
+
# GPT Latency Buffer: GPT takes ~3s. At 30fps, that's 90 frames. We need to absorb this burst.
|
| 976 |
+
queue_out_max = max(512, (len(detectors) if detectors else 1) * 64)
|
| 977 |
queue_out = Queue(maxsize=queue_out_max)
|
| 978 |
|
| 979 |
|
|
|
|
| 1123 |
# However, if 'buffer' grows too large (because we are missing next_idx),
|
| 1124 |
# we are effectively unbounded again if queue_out fills up with future frames.
|
| 1125 |
# So we should monitor buffer size.
|
| 1126 |
+
if len(buffer) > 200 and len(buffer) % 50 == 0:
|
| 1127 |
+
logging.warning("Writer buffer large (%d items), waiting for frame %d (GPT Latency?)...", len(buffer), next_idx)
|
| 1128 |
|
| 1129 |
item = queue_out.get(timeout=1.0) # wait
|
| 1130 |
|