Zhen Ye commited on
Commit
89b854c
·
1 Parent(s): fe670b9

fix: Increase queue buffer to absorb GPT latency

Browse files
Files changed (1) hide show
  1. inference.py +4 -3
inference.py CHANGED
@@ -972,7 +972,8 @@ def run_inference(
972
  # queue_out: (frame_idx, processed_frame, detections)
973
  queue_in = Queue(maxsize=16)
974
  # Tuning for A10: buffer at least 32 frames per GPU (batch size)
975
- queue_out_max = max(64, (len(detectors) if detectors else 1) * 32)
 
976
  queue_out = Queue(maxsize=queue_out_max)
977
 
978
 
@@ -1122,8 +1123,8 @@ def run_inference(
1122
  # However, if 'buffer' grows too large (because we are missing next_idx),
1123
  # we are effectively unbounded again if queue_out fills up with future frames.
1124
  # So we should monitor buffer size.
1125
- if len(buffer) > 64:
1126
- logging.warning("Writer buffer large (%d items), waiting for frame %d...", len(buffer), next_idx)
1127
 
1128
  item = queue_out.get(timeout=1.0) # wait
1129
 
 
972
  # queue_out: (frame_idx, processed_frame, detections)
973
  queue_in = Queue(maxsize=16)
974
  # Tuning for A10: buffer at least 32 frames per GPU (batch size)
975
+ # GPT Latency Buffer: GPT takes ~3s. At 30fps, that's 90 frames. We need to absorb this burst.
976
+ queue_out_max = max(512, (len(detectors) if detectors else 1) * 64)
977
  queue_out = Queue(maxsize=queue_out_max)
978
 
979
 
 
1123
  # However, if 'buffer' grows too large (because we are missing next_idx),
1124
  # we are effectively unbounded again if queue_out fills up with future frames.
1125
  # So we should monitor buffer size.
1126
+ if len(buffer) > 200 and len(buffer) % 50 == 0:
1127
+ logging.warning("Writer buffer large (%d items), waiting for frame %d (GPT Latency?)...", len(buffer), next_idx)
1128
 
1129
  item = queue_out.get(timeout=1.0) # wait
1130