Spaces:

BiasLab2025
/

perception

Paused

App Files Files Community

Zhen Ye commited on 8 days ago

Commit

6375955

1 Parent(s): ecbbe4e

Simplify first-frame processing, replace depth pre-scan with incremental stats, add GPT enrichment thread

Browse files

Files changed (2) hide show

inference.py +213 -413
jobs/background.py +1 -0

inference.py CHANGED Viewed

@@ -336,6 +336,50 @@ class SpeedEstimator:
             det['angle_deg'] = angle # 0 is right, 90 is down (screen space)
 _MODEL_LOCKS: Dict[str, RLock] = {}
 _MODEL_LOCKS_GUARD = RLock()
 _DEPTH_SCALE = float(os.getenv("DEPTH_SCALE", "25.0"))
@@ -713,145 +757,31 @@ def process_first_frame(
     mode: str,
     detector_name: Optional[str] = None,
     segmenter_name: Optional[str] = None,
-    depth_estimator_name: Optional[str] = None,
-    depth_scale: Optional[float] = None,
-    enable_depth_estimator: bool = False,
-    enable_gpt: bool = True,  # ENABLED BY DEFAULT
-    mission_spec=None,  # Optional[MissionSpecification]
-) -> Tuple[np.ndarray, List[Dict[str, Any]], Optional[np.ndarray], Optional[Dict[str, Any]]]:
     frame, _, _, _ = extract_first_frame(video_path)
     if mode == "segmentation":
         processed, _ = infer_segmentation_frame(
             frame, text_queries=queries, segmenter_name=segmenter_name
         )
-        return processed, [], None, None
     processed, detections = infer_frame(
         frame, queries, detector_name=detector_name
     )
-    # --- RELEVANCE GATE (between detection and GPT) ---
-    if mission_spec:
-        if mission_spec.parse_mode == "FAST_PATH":
-            # Deterministic gate (unchanged)
-            relevant_dets = []
-            for det in detections:
-                decision = evaluate_relevance(det, mission_spec.relevance_criteria)
-                det["mission_relevant"] = decision.relevant
-                det["relevance_reason"] = decision.reason
-                if decision.relevant:
-                    relevant_dets.append(det)
-                else:
-                    logging.info(
-                        json_module.dumps({
-                            "event": "relevance_decision",
-                            "label": det.get("label"),
-                            "relevant": False,
-                            "reason": decision.reason,
-                            "required_classes": mission_spec.relevance_criteria.required_classes,
-                            "frame": 0,
-                        })
-                    )
-            gpt_input_dets = relevant_dets
-        else:
-            # LLM_EXTRACTED: post-filter with GPT on frame 0
-            unique_labels = list({
-                d.get("label", "").lower()
-                for d in detections if d.get("label")
-            })
-            relevant_labels = evaluate_relevance_llm(
-                unique_labels, mission_spec.operator_text
-            )
-            # Cache GPT-approved labels into relevance_criteria for subsequent frames
-            mission_spec.relevance_criteria.required_classes = list(relevant_labels)
-            for det in detections:
-                label = (det.get("label") or "").lower()
-                is_relevant = label in relevant_labels
-                det["mission_relevant"] = is_relevant
-                det["relevance_reason"] = "ok" if is_relevant else "llm_excluded"
-                if not is_relevant:
-                    logging.info(
-                        json_module.dumps({
-                            "event": "relevance_decision",
-                            "label": det.get("label"),
-                            "relevant": False,
-                            "reason": "llm_excluded",
-                            "relevant_labels": list(relevant_labels),
-                            "frame": 0,
-                        })
-                    )
-            gpt_input_dets = [d for d in detections if d.get("mission_relevant")]
-    else:
-        # LEGACY mode: all detections pass, tagged as unresolved
-        for det in detections:
-            det["mission_relevant"] = None
-        gpt_input_dets = detections
-    # 1. Synchronous Depth Estimation (HF Backend)
-    depth_map = None
-    # If a specific depth estimator is requested OR if generic "enable" flag is on
-    should_run_depth = (depth_estimator_name is not None) or enable_depth_estimator
-    if should_run_depth and detections:
-        try:
-            # Resolve name: if none given, default to "depth"
-            d_name = depth_estimator_name if depth_estimator_name else "depth"
-            scale = depth_scale if depth_scale is not None else 1.0
-            logging.info(f"Running synchronous depth estimation with {d_name} (scale={scale})...")
-            estimator = load_depth_estimator(d_name)
-            # Run prediction
-            with _get_model_lock("depth", estimator.name):
-                 result = estimator.predict(frame)
-            depth_map = result.depth_map
-            # Compute per-detection depth metrics
-            detections = compute_depth_per_detection(depth_map, detections, scale)
-        except Exception as e:
-            logging.exception(f"First frame depth failed: {e}")
-            # Mark all detections as depth_valid=False just in case
-            for det in detections:
-                det["depth_est_m"] = None
-                det["depth_rel"] = None
-                det["depth_valid"] = False
-    # 2. GPT-based Distance/Direction Estimation (Explicitly enabled)
-    # Only assess mission-relevant detections
-    gpt_results = None
-    if enable_gpt and gpt_input_dets:
-        try:
-            frame_b64 = encode_frame_to_b64(frame)
-            gpt_results = estimate_threat_gpt(
-                detections=gpt_input_dets, mission_spec=mission_spec,
-                image_b64=frame_b64,
-            )
-            logging.info(f"GPT Output for First Frame:\n{gpt_results}")
-            # Merge GPT results into detections (polyfilled keys from gpt_reasoning)
-            for i, det in enumerate(gpt_input_dets):
-                 obj_id = f"T{str(i+1).zfill(2)}"
-                 if obj_id in gpt_results:
-                     info = gpt_results[obj_id]
-                     det.update(info)
-                     det["gpt_raw"] = info
-                     # Provenance: tag assessment frame
-                     det["assessment_frame_index"] = 0
-                     det["assessment_status"] = "ASSESSED"
-        except Exception as e:
-            logging.error(f"GPT Threat estimation failed: {e}")
-    # Tag unassessed detections (INV-6: distinct from score 0)
     for det in detections:
-        if "assessment_status" not in det:
-            det["assessment_status"] = "UNASSESSED"
-    return processed, detections, depth_map, gpt_results
 def run_inference(
@@ -867,6 +797,7 @@ def run_inference(
     stream_queue: Optional[Queue] = None,
     mission_spec=None,  # Optional[MissionSpecification]
     first_frame_gpt_results: Optional[Dict[str, Any]] = None,
 ) -> Tuple[str, List[List[Dict[str, Any]]]]:
     # 1. Setup Video Reader
@@ -936,60 +867,8 @@ def run_inference(
         else:
             depth_estimators.append(None)
-    # 4. Phase 1: Pre-Scan (Depth Normalization Stats) - ONLY IF DEPTH ENABLED
-    global_min, global_max = 0.0, 1.0
-    if depth_estimator_name and depth_estimators[0]:
-        logging.info("Starting Phase 1: Pre-scan for depth stats...")
-        # We need a quick scan logic here.
-        # Since we have loaded models, we can use one of them to scan a few frames.
-        # Let's pick 0-th GPU model.
-        scan_est = depth_estimators[0]
-        scan_values = []
-        # Sample frames: First 10, Middle 10, Last 10
-        target_indices = set(list(range(0, 10)) +
-                             list(range(total_frames//2, total_frames//2 + 10)) +
-                             list(range(max(0, total_frames-10), total_frames)))
-        target_indices = sorted([i for i in target_indices if i < total_frames])
-        try:
-             # Quick reader scan
-             reader_scan = AsyncVideoReader(input_video_path)
-             scan_frames = []
-             for i, frame in enumerate(reader_scan):
-                 if i in target_indices:
-                     scan_frames.append(frame)
-                 if i > max(target_indices):
-                     break
-             reader_scan.close()
-             # Predict
-             with scan_est.lock:
-                # Batch if supported, else loop
-                if scan_est.supports_batch and scan_frames:
-                     scan_res = scan_est.predict_batch(scan_frames)
-                else:
-                     scan_res = [scan_est.predict(f) for f in scan_frames]
-             for r in scan_res:
-                 if r.depth_map is not None:
-                     scan_values.append(r.depth_map)
-             # Stats
-             if scan_values:
-                all_vals = np.concatenate([v.ravel() for v in scan_values])
-                valid = all_vals[np.isfinite(all_vals)]
-                if valid.size > 0:
-                    global_min = float(np.percentile(valid, 1))
-                    global_max = float(np.percentile(valid, 99))
-                    # Prevent zero range
-                    if abs(global_max - global_min) < 1e-6: global_max = global_min + 1.0
-             logging.info("Global Depth Range: %.2f - %.2f", global_min, global_max)
-        except Exception as e:
-             logging.warning("Pre-scan failed, using default range: %s", e)
     # queue_in: (frame_idx, frame_data)
     # queue_out: (frame_idx, processed_frame, detections)
@@ -1021,19 +900,35 @@ def run_inference(
             frames = [item[1] for item in batch_accum]
             # --- UNIFIED INFERENCE ---
-            # Run detection batch
-            try:
-                if detector_instance.supports_batch:
-                    with detector_instance.lock:
-                         det_results = detector_instance.predict_batch(frames, queries)
-                else:
-                    with detector_instance.lock:
-                         det_results = [detector_instance.predict(f, queries) for f in frames]
-            except BaseException as e:
-                logging.exception("Batch detection crashed with critical error")
-                det_results = [None] * len(frames)
-            # Run depth batch (if enabled)
             depth_results = [None] * len(frames)
             if depth_instance and depth_estimator_name:
                 try:
@@ -1045,21 +940,32 @@ def run_inference(
                 except BaseException as e:
                      logging.exception("Batch depth crashed with critical error")
             # --- POST PROCESSING ---
-            for i, (idx, frame, d_res, dep_res) in enumerate(zip(indices, frames, det_results, depth_results)):
-                # 1. Detections
                 detections = []
-                if d_res:
-                     detections = _build_detection_records(
-                        d_res.boxes, d_res.scores, d_res.labels, queries, d_res.label_names
-                     )
                 # 2. Frame Rendering
                 processed = frame.copy()
                 # A. Render Depth Heatmap (if enabled)
                 if dep_res and dep_res.depth_map is not None:
-                     processed = colorize_depth_map(dep_res.depth_map, global_min, global_max)
                      try:
                          _attach_depth_from_result(detections, dep_res, depth_scale)
                      except: pass
@@ -1137,15 +1043,76 @@ def run_inference(
     # writer_finished = False
     def writer_loop():
         nonlocal writer_finished
         next_idx = 0
         buffer = {}
         # Initialize Tracker & Speed Estimator
         tracker = ByteTracker(frame_rate=fps)
         speed_est = SpeedEstimator(fps=fps)
-        llm_filtered = False  # LLM post-filter runs once on frame 0
         try:
             with StreamingVideoWriter(output_video_path, fps, width, height) as writer:
@@ -1171,34 +1138,7 @@ def run_inference(
                         dets = tracker.update(dets)
                         speed_est.estimate(dets)
-                        # --- LLM POST-FILTER (frame 0 only, LLM_EXTRACTED mode) ---
-                        if (mission_spec
-                                and mission_spec.parse_mode == "LLM_EXTRACTED"
-                                and not llm_filtered
-                                and next_idx == 0):
-                            # Skip if process_first_frame already populated required_classes
-                            if mission_spec.relevance_criteria.required_classes:
-                                logging.info(
-                                    "LLM post-filter already applied by process_first_frame: classes=%s",
-                                    mission_spec.relevance_criteria.required_classes,
-                                )
-                            else:
-                                unique_labels = list({
-                                    d.get("label", "").lower()
-                                    for d in dets if d.get("label")
-                                })
-                                relevant_labels = evaluate_relevance_llm(
-                                    unique_labels, mission_spec.operator_text
-                                )
-                                # Cache into relevance_criteria for all subsequent frames
-                                mission_spec.relevance_criteria.required_classes = list(relevant_labels)
-                                logging.info(
-                                    "LLM post-filter applied on frame 0: relevant=%s",
-                                    relevant_labels,
-                                )
-                            llm_filtered = True
-                        # --- RELEVANCE GATE (deterministic, uses updated criteria) ---
                         if mission_spec:
                             for d in dets:
                                 decision = evaluate_relevance(d, mission_spec.relevance_criteria)
@@ -1222,43 +1162,27 @@ def run_inference(
                                 d["mission_relevant"] = None
                             gpt_dets = dets
-                        # --- GPT ESTIMATION (Frame 0 Only) ---
-                        if next_idx == 0 and enable_gpt and gpt_dets:
                             try:
-                                if first_frame_gpt_results:
-                                    # Re-use GPT results from process_first_frame (avoid duplicate call)
-                                    logging.info("Re-using GPT results from first-frame processing (skipping duplicate call)")
-                                    gpt_res = first_frame_gpt_results
-                                else:
-                                    logging.info("Running GPT estimation for video start (Frame 0)...")
-                                    frame_b64 = encode_frame_to_b64(p_frame)
-                                    gpt_res = estimate_threat_gpt(
-                                        detections=gpt_dets, mission_spec=mission_spec,
-                                        image_b64=frame_b64,
-                                    )
-                                # Merge using real track_id assigned by ByteTracker
-                                for d in gpt_dets:
-                                    oid = d.get('track_id')
-                                    if oid and oid in gpt_res:
-                                        d.update(gpt_res[oid])
-                                        d["gpt_raw"] = gpt_res[oid]
-                                        d["assessment_frame_index"] = 0
-                                        d["assessment_status"] = "ASSESSED"
-                                # Push GPT data back into tracker's internal STrack objects
-                                tracker.inject_metadata(gpt_dets)
-                            except Exception as e:
-                                logging.error("GPT failed for Frame 0: %s", e)
                         # Tag unassessed detections (INV-6)
                         for d in dets:
                             if "assessment_status" not in d:
                                 d["assessment_status"] = "UNASSESSED"
                         # --- RENDER BOXES & OVERLAYS ---
-                        # We need to convert list of dicts back to boxes array for draw_boxes
                         if dets:
                             display_boxes = np.array([d['bbox'] for d in dets])
                             display_labels = []
@@ -1267,81 +1191,42 @@ def run_inference(
                                 # Append Track ID
                                 if 'track_id' in d:
                                     lbl = f"{d['track_id']} {lbl}"
-                                # Speed display removed per user request
-                                # if 'speed_kph' in d and d['speed_kph'] > 1.0:
-                                #     lbl += f" {int(d['speed_kph'])}km/h"
-                                # Distance display removed per user request
-                                # if d.get('gpt_distance_m'):
-                                #     lbl += f" {int(d['gpt_distance_m'])}m"
                                 display_labels.append(lbl)
                             p_frame = draw_boxes(p_frame, display_boxes, label_names=display_labels)
                         writer.write(p_frame)
                         if stream_queue:
                             try:
-                                # Send TRACKED detections to frontend for overlay
-                                # We need to attach them to the frame or send separately?
-                                # The stream_queue expects 'p_frame' which is an image.
-                                # The frontend polls for 'async job' status which returns video, but
-                                # we also want live updates during streaming?
-                                # Currently streaming is just Mjpeg of p_frame.
-                                stream_queue.put(p_frame, timeout=0.01)
                             except:
                                 pass
                         all_detections_map[next_idx] = dets
                         # Store tracks for frontend access
                         if job_id:
                             set_track_data(job_id, next_idx, dets)
                         next_idx += 1
                         if next_idx % 30 == 0:
                             logging.debug("Wrote frame %d/%d", next_idx, total_frames)
                     except Exception as e:
                         logging.error(f"Writer loop processing error at index {next_idx}: {e}")
-                        # Important: If we failed AFTER popping from buffer, we must increment next_idx to avoid infinite loop
-                        # How do we know? We can check if next_idx is in buffer.
-                        # If we popped it, it's not in buffer.
-                        # But wait, next_idx is used for loop condition.
-                        # If we successfully popped it but failed later, we lost the frame.
-                        # We should increment next_idx to skip it.
-                        # Heuristic: If we are here, something failed.
-                        # If we haven't successfully written/processed, we should probably skip this frame processing
-                        # to let the loop continue to next frame.
-                        # But we need to make sure we don't skip if the error was just "queue empty" (timeout).
-                        # Wait, queue_out.get raises Empty. 'Empty' is NOT Exception?
-                        # In Python 'queue.Empty' inherits form Exception?
-                        # Actually 'queue.Empty' exception is just 'Exception'.
-                        # Let's check imports. from queue import Empty.
-                        # Yes.
-                        # We should catch Empty explicitly?
-                        # No, get(timeout=1.0) raises Empty.
-                        # If the error is NOT Empty, then it's a real crash.
                         if "Empty" not in str(type(e)):
                              logging.error(f"CRITICAL WRITER ERROR: {e}")
-                             # Force skip frame if we suspect we are stuck
-                             # Only if we hold the lock/state?
-                             # Simpler: Just try to proceed.
-                             # If we popped the frame, next_idx should be incremented?
-                             # Actually we can't easily know if we popped.
-                             # But we can check if we are stuck on the same index for too long?
-                             pass
                         # Check cancellation or timeout
-                        if job_id and _check_cancellation(job_id): # This raises
                              pass
                         if not any(w.is_alive() for w in workers) and queue_out.empty():
-                             # Workers dead, queue empty, but not finished? prevent infinite loop
                              logging.error("Workers stopped unexpectedly.")
                              break
                         continue
@@ -1349,6 +1234,12 @@ def run_inference(
             logging.exception("Writer loop failed")
         finally:
             logging.info("Writer loop finished. Wrote %d frames (target %d)", next_idx, total_frames)
             writer_finished = True
     writer_thread = Thread(target=writer_loop, daemon=True)
@@ -1698,105 +1589,8 @@ def run_depth_inference(
         est.lock = RLock()
         estimators.append(est)
-    # 3. Phase 1: Pre-scan for Stats
-    # We sample ~5% of frames or at least 20 frames distributed evenly
-    stride = max(1, total_frames // 20)
-    logging.info("Starting Phase 1: Pre-scan (stride=%d)...", stride)
-    scan_values = []
-    def scan_task(gpu_idx: int, frame_data: np.ndarray):
-        est = estimators[gpu_idx]
-        with est.lock:
-            result = est.predict(frame_data)
-        return result.depth_map
-    # Run scan
-    # We can just run this sequentially or with pool? Pool is better.
-    # We need to construct a list of frames to scan.
-    scan_indices = list(range(0, total_frames, stride))
-    # We need to read specific frames. VideoReader is sequential.
-    # So we iterate and skip.
-    scan_frames = []
-    # Optimization: If total frames is huge, reading simply to skip might be slow?
-    # VideoReader uses cv2.read() which decodes.
-    # If we need random access, we should use set(cv2.CAP_PROP_POS_FRAMES).
-    # But for now, simple skip logic:
-    current_idx = 0
-    # To avoid re-opening multiple times or complex seeking, let's just use the Reader
-    # and skip if not in indices.
-    # BUT, if video is 1 hour, skipping 99% frames is wastage of decode.
-    # Re-opening with set POS is better for sparse sampling.
-    # Actually, for robustness, let's just stick to VideoReader sequential read but only process selective frames.
-    # If the video is truly huge, we might want to optimize this later.
-    # Given the constraints, let's just scan the first N frames + some middle ones?
-    # User agreed to "Small startup delay".
-    # Let's try to just grab the frames we want.
-    scan_frames_data = []
-    # Just grab first 50 frames? No, distribution is better.
-    # Let's use a temporary reader for scanning
-    try:
-        from concurrent.futures import as_completed
-        # Simple Approach: Process first 30 frames to get a baseline.
-        # This is usually enough for a "rough" estimation unless scenes change drastically.
-        # But for stability, spread is better.
-        # Let's read first 10, middle 10, last 10.
-        target_indices = set(list(range(0, 10)) +
-                             list(range(total_frames//2, total_frames//2 + 10)) +
-                             list(range(max(0, total_frames-10), total_frames)))
-        # Filter valid
-        target_indices = sorted([i for i in target_indices if i < total_frames])
-        # Manual read with seek is tricky with cv2 (unreliable keyframes).
-        # We will iterate and pick.
-        cnt = 0
-        reader_scan = AsyncVideoReader(input_video_path)
-        for i, frame in enumerate(reader_scan):
-            if i in target_indices:
-               scan_frames_data.append(frame)
-            if i > max(target_indices):
-                break
-        reader_scan.close()
-        # Run inference on these frames
-        with ThreadPoolExecutor(max_workers=min(len(estimators)*2, 8)) as pool:
-            futures = []
-            for i, frm in enumerate(scan_frames_data):
-                gpu = i % len(estimators)
-                futures.append(pool.submit(scan_task, gpu, frm))
-            for f in as_completed(futures):
-                dm = f.result()
-                scan_values.append(dm)
-    except Exception as e:
-        logging.warning("Pre-scan failed, falling back to default range: %s", e)
-    # Compute stats
-    global_min, global_max = 0.0, 1.0
-    if scan_values:
-        all_vals = np.concatenate([v.ravel() for v in scan_values])
-        valid = all_vals[np.isfinite(all_vals)]
-        if valid.size > 0:
-            global_min = float(np.percentile(valid, 1))
-            global_max = float(np.percentile(valid, 99))
-            # Safety
-            if abs(global_max - global_min) < 1e-6:
-                global_max = global_min + 1.0
-    logging.info("Global Depth Range: %.2f - %.2f", global_min, global_max)
     # 4. Phase 2: Streaming Inference
     logging.info("Starting Phase 2: Streaming...")
@@ -1826,10 +1620,16 @@ def run_depth_inference(
                     with est.lock:
                          results = [est.predict(f) for f in frames]
                 # 2. Post-process loop
                 for idx, frm, res in zip(indices, frames, results):
                     depth_map = res.depth_map
-                    colored = colorize_depth_map(depth_map, global_min, global_max)
                     # Overlay Detections
                     if detections and idx < len(detections):

             det['angle_deg'] = angle # 0 is right, 90 is down (screen space)
+class IncrementalDepthStats:
+    """Thread-safe incremental depth range estimator.
+    Collects depth statistics frame-by-frame so the expensive pre-scan
+    (opening a second video reader) can be eliminated.  Before
+    ``warmup_frames`` updates the range defaults to (0.0, 1.0).
+    """
+    def __init__(self, warmup_frames: int = 30):
+        self._lock = RLock()
+        self._warmup = warmup_frames
+        self._count = 0
+        self._global_min = float("inf")
+        self._global_max = float("-inf")
+    def update(self, depth_map: np.ndarray) -> None:
+        if depth_map is None or depth_map.size == 0:
+            return
+        finite = depth_map[np.isfinite(depth_map)]
+        if finite.size == 0:
+            return
+        lo = float(np.percentile(finite, 1))
+        hi = float(np.percentile(finite, 99))
+        with self._lock:
+            self._global_min = min(self._global_min, lo)
+            self._global_max = max(self._global_max, hi)
+            self._count += 1
+    @property
+    def range(self) -> Tuple[float, float]:
+        with self._lock:
+            if self._count < self._warmup:
+                # Not enough data yet — use default range
+                if self._count == 0:
+                    return (0.0, 1.0)
+                # Use what we have but may be less stable
+                lo, hi = self._global_min, self._global_max
+            else:
+                lo, hi = self._global_min, self._global_max
+            if abs(hi - lo) < 1e-6:
+                hi = lo + 1.0
+            return (lo, hi)
 _MODEL_LOCKS: Dict[str, RLock] = {}
 _MODEL_LOCKS_GUARD = RLock()
 _DEPTH_SCALE = float(os.getenv("DEPTH_SCALE", "25.0"))
     mode: str,
     detector_name: Optional[str] = None,
     segmenter_name: Optional[str] = None,
+) -> Tuple[np.ndarray, List[Dict[str, Any]]]:
+    """Lightweight first-frame processing: detection + rendering only.
+    GPT, depth, and LLM relevance are handled later in the async pipeline
+    (writer enrichment thread), avoiding 2-8s synchronous startup delay.
+    Returns:
+        (processed_frame, detections) — all detections tagged UNASSESSED.
+    """
     frame, _, _, _ = extract_first_frame(video_path)
     if mode == "segmentation":
         processed, _ = infer_segmentation_frame(
             frame, text_queries=queries, segmenter_name=segmenter_name
         )
+        return processed, []
     processed, detections = infer_frame(
         frame, queries, detector_name=detector_name
     )
+    # Tag all detections as unassessed — GPT runs later in enrichment thread
     for det in detections:
+        det["assessment_status"] = "UNASSESSED"
+    return processed, detections
 def run_inference(
     stream_queue: Optional[Queue] = None,
     mission_spec=None,  # Optional[MissionSpecification]
     first_frame_gpt_results: Optional[Dict[str, Any]] = None,
+    first_frame_detections: Optional[List[Dict[str, Any]]] = None,
 ) -> Tuple[str, List[List[Dict[str, Any]]]]:
     # 1. Setup Video Reader
         else:
             depth_estimators.append(None)
+    # 4. Incremental Depth Stats (replaces expensive pre-scan)
+    depth_stats = IncrementalDepthStats(warmup_frames=30) if depth_estimator_name else None
     # queue_in: (frame_idx, frame_data)
     # queue_out: (frame_idx, processed_frame, detections)
             frames = [item[1] for item in batch_accum]
             # --- UNIFIED INFERENCE ---
+            # Separate frame 0 if we have cached detections (avoid re-detecting)
+            cached_frame0 = None
+            detect_indices = indices
+            detect_frames = frames
+            if first_frame_detections is not None and 0 in indices:
+                f0_pos = indices.index(0)
+                cached_frame0 = (indices[f0_pos], frames[f0_pos])
+                detect_indices = indices[:f0_pos] + indices[f0_pos+1:]
+                detect_frames = frames[:f0_pos] + frames[f0_pos+1:]
+                logging.info("Worker %d: reusing cached detections for frame 0", gpu_idx)
+            # Run detection batch (excluding frame 0 if cached)
+            det_results_map = {}
+            if detect_frames:
+                try:
+                    if detector_instance.supports_batch:
+                        with detector_instance.lock:
+                             raw_results = detector_instance.predict_batch(detect_frames, queries)
+                    else:
+                        with detector_instance.lock:
+                             raw_results = [detector_instance.predict(f, queries) for f in detect_frames]
+                    for di, dr in zip(detect_indices, raw_results):
+                        det_results_map[di] = dr
+                except BaseException as e:
+                    logging.exception("Batch detection crashed with critical error")
+                    for di in detect_indices:
+                        det_results_map[di] = None
+            # Run depth batch (if enabled) — always for all frames
             depth_results = [None] * len(frames)
             if depth_instance and depth_estimator_name:
                 try:
                 except BaseException as e:
                      logging.exception("Batch depth crashed with critical error")
+            # Update incremental depth stats
+            if depth_stats is not None:
+                for dep_res in depth_results:
+                    if dep_res and dep_res.depth_map is not None:
+                        depth_stats.update(dep_res.depth_map)
             # --- POST PROCESSING ---
+            for i, (idx, frame, dep_res) in enumerate(zip(indices, frames, depth_results)):
+                # 1. Detections — use cached for frame 0 if available
                 detections = []
+                if cached_frame0 is not None and idx == 0:
+                    detections = [d.copy() for d in first_frame_detections]
+                else:
+                    d_res = det_results_map.get(idx)
+                    if d_res:
+                        detections = _build_detection_records(
+                            d_res.boxes, d_res.scores, d_res.labels, queries, d_res.label_names
+                        )
                 # 2. Frame Rendering
                 processed = frame.copy()
                 # A. Render Depth Heatmap (if enabled)
                 if dep_res and dep_res.depth_map is not None:
+                     ds_min, ds_max = depth_stats.range if depth_stats else (0.0, 1.0)
+                     processed = colorize_depth_map(dep_res.depth_map, ds_min, ds_max)
                      try:
                          _attach_depth_from_result(detections, dep_res, depth_scale)
                      except: pass
     # writer_finished = False
+    # --- GPT Enrichment Thread (non-blocking) ---
+    # Runs LLM relevance + GPT threat assessment off the writer's critical path.
+    gpt_enrichment_queue = Queue(maxsize=4)
+    def enrichment_thread_fn(tracker_ref):
+        """Dedicated thread for GPT/LLM calls. Receives work from writer, injects results via tracker."""
+        while True:
+            item = gpt_enrichment_queue.get()
+            if item is None:
+                break  # Sentinel — shutdown
+            frame_idx, frame_data, gpt_dets, ms = item
+            try:
+                # LLM post-filter (LLM_EXTRACTED mode, frame 0 only)
+                if (ms and ms.parse_mode == "LLM_EXTRACTED"
+                        and not ms.relevance_criteria.required_classes):
+                    unique_labels = list({
+                        d.get("label", "").lower()
+                        for d in gpt_dets if d.get("label")
+                    })
+                    relevant_labels = evaluate_relevance_llm(
+                        unique_labels, ms.operator_text
+                    )
+                    ms.relevance_criteria.required_classes = list(relevant_labels)
+                    logging.info(
+                        "Enrichment: LLM post-filter applied on frame %d: relevant=%s",
+                        frame_idx, relevant_labels,
+                    )
+                # GPT threat assessment
+                if gpt_dets:
+                    if first_frame_gpt_results:
+                        logging.info("Enrichment: re-using cached GPT results for frame %d", frame_idx)
+                        gpt_res = first_frame_gpt_results
+                    else:
+                        logging.info("Enrichment: running GPT estimation for frame %d...", frame_idx)
+                        frame_b64 = encode_frame_to_b64(frame_data)
+                        gpt_res = estimate_threat_gpt(
+                            detections=gpt_dets, mission_spec=ms,
+                            image_b64=frame_b64,
+                        )
+                    # Merge using real track_id assigned by ByteTracker
+                    for d in gpt_dets:
+                        oid = d.get('track_id')
+                        if oid and oid in gpt_res:
+                            d.update(gpt_res[oid])
+                            d["gpt_raw"] = gpt_res[oid]
+                            d["assessment_frame_index"] = frame_idx
+                            d["assessment_status"] = "ASSESSED"
+                    # Push GPT data back into tracker's internal STrack objects
+                    tracker_ref.inject_metadata(gpt_dets)
+                    logging.info("Enrichment: GPT results injected into tracker for frame %d", frame_idx)
+            except Exception as e:
+                logging.error("Enrichment thread failed for frame %d: %s", frame_idx, e)
     def writer_loop():
         nonlocal writer_finished
         next_idx = 0
         buffer = {}
         # Initialize Tracker & Speed Estimator
         tracker = ByteTracker(frame_rate=fps)
         speed_est = SpeedEstimator(fps=fps)
+        gpt_submitted = False  # GPT enrichment submitted once for frame 0
+        # Start enrichment thread
+        enrich_thread = Thread(target=enrichment_thread_fn, args=(tracker,), daemon=True)
+        enrich_thread.start()
         try:
             with StreamingVideoWriter(output_video_path, fps, width, height) as writer:
                         dets = tracker.update(dets)
                         speed_est.estimate(dets)
+                        # --- RELEVANCE GATE (deterministic, fast — stays in writer) ---
                         if mission_spec:
                             for d in dets:
                                 decision = evaluate_relevance(d, mission_spec.relevance_criteria)
                                 d["mission_relevant"] = None
                             gpt_dets = dets
+                        # --- GPT ENRICHMENT (non-blocking, offloaded to enrichment thread) ---
+                        if next_idx == 0 and enable_gpt and gpt_dets and not gpt_submitted:
+                            # Tag as pending — enrichment thread will update to ASSESSED later
+                            for d in gpt_dets:
+                                d["assessment_status"] = "PENDING_GPT"
                             try:
+                                gpt_enrichment_queue.put(
+                                    (next_idx, p_frame.copy(), gpt_dets, mission_spec),
+                                    timeout=1.0,
+                                )
+                                gpt_submitted = True
+                                logging.info("Writer: offloaded GPT enrichment for frame 0")
+                            except Full:
+                                logging.warning("GPT enrichment queue full, skipping frame 0 GPT")
                         # Tag unassessed detections (INV-6)
                         for d in dets:
                             if "assessment_status" not in d:
                                 d["assessment_status"] = "UNASSESSED"
                         # --- RENDER BOXES & OVERLAYS ---
                         if dets:
                             display_boxes = np.array([d['bbox'] for d in dets])
                             display_labels = []
                                 # Append Track ID
                                 if 'track_id' in d:
                                     lbl = f"{d['track_id']} {lbl}"
                                 display_labels.append(lbl)
                             p_frame = draw_boxes(p_frame, display_boxes, label_names=display_labels)
                         writer.write(p_frame)
                         if stream_queue:
                             try:
+                                from jobs.streaming import publish_frame as _publish
+                                if job_id:
+                                    _publish(job_id, p_frame)
+                                else:
+                                    stream_queue.put(p_frame, timeout=0.01)
                             except:
                                 pass
                         all_detections_map[next_idx] = dets
                         # Store tracks for frontend access
                         if job_id:
                             set_track_data(job_id, next_idx, dets)
                         next_idx += 1
                         if next_idx % 30 == 0:
                             logging.debug("Wrote frame %d/%d", next_idx, total_frames)
                     except Exception as e:
                         logging.error(f"Writer loop processing error at index {next_idx}: {e}")
                         if "Empty" not in str(type(e)):
                              logging.error(f"CRITICAL WRITER ERROR: {e}")
                         # Check cancellation or timeout
+                        if job_id and _check_cancellation(job_id):
                              pass
                         if not any(w.is_alive() for w in workers) and queue_out.empty():
                              logging.error("Workers stopped unexpectedly.")
                              break
                         continue
             logging.exception("Writer loop failed")
         finally:
             logging.info("Writer loop finished. Wrote %d frames (target %d)", next_idx, total_frames)
+            # Shut down enrichment thread
+            try:
+                gpt_enrichment_queue.put(None, timeout=5.0)
+                enrich_thread.join(timeout=30)
+            except Exception:
+                logging.warning("Enrichment thread shutdown timed out")
             writer_finished = True
     writer_thread = Thread(target=writer_loop, daemon=True)
         est.lock = RLock()
         estimators.append(est)
+    # 3. Incremental Depth Stats (replaces expensive pre-scan)
+    depth_stats = IncrementalDepthStats(warmup_frames=30)
     # 4. Phase 2: Streaming Inference
     logging.info("Starting Phase 2: Streaming...")
                     with est.lock:
                          results = [est.predict(f) for f in frames]
+                # Update incremental depth stats
+                for res in results:
+                    if res and res.depth_map is not None:
+                        depth_stats.update(res.depth_map)
                 # 2. Post-process loop
                 for idx, frm, res in zip(indices, frames, results):
                     depth_map = res.depth_map
+                    ds_min, ds_max = depth_stats.range
+                    colored = colorize_depth_map(depth_map, ds_min, ds_max)
                     # Overlay Detections
                     if detections and idx < len(detections):

jobs/background.py CHANGED Viewed

@@ -54,6 +54,7 @@ async def process_video_async(job_id: str) -> None:
                 stream_queue,
                 job.mission_spec,  # Forward mission spec for relevance gating
                 job.first_frame_gpt_results,  # Avoid duplicate GPT call on frame 0
             )
             detection_path, detections_list = result_pkg

                 stream_queue,
                 job.mission_spec,  # Forward mission spec for relevance gating
                 job.first_frame_gpt_results,  # Avoid duplicate GPT call on frame 0
+                job.first_frame_detections,  # Reuse frame 0 detections (avoid re-detecting)
             )
             detection_path, detections_list = result_pkg