Spaces:

BiasLab2025
/

detection_base

Paused

Zhen Ye Claude Opus 4.6 (1M context) commited on Apr 13

Commit

9574811

1 Parent(s): 164d8b0

refactor: remove GPT/mission/relevance system, keep CV-only base

Strip all GPT threat assessment, mission parsing, relevance gating,
and enrichment logic to create a reusable detection base. All computer
vision functionality preserved: detectors, segmenters, depth estimation,
object tracking, multi-GPU pipeline, async jobs, and MJPEG streaming.

Deleted: utils/{gpt_reasoning,openai_client,threat_chat,relevance,
enrichment,mission_parser,schemas}.py
Removed: enable_gpt, mission_spec, first_frame_gpt_results params
from inference pipeline, jobs, and API endpoints.
Removed: /detect/analyze-frame, /reason/track, /chat/threat endpoints.
Removed: sentence-transformers, python-dotenv dependencies.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

Files changed (12) hide show

app.py +20 -340
inference.py +8 -336
jobs/background.py +2 -8
jobs/models.py +0 -5
requirements.txt +0 -2
utils/enrichment.py +0 -122
utils/gpt_reasoning.py +0 -374
utils/mission_parser.py +0 -481
utils/openai_client.py +0 -80
utils/relevance.py +0 -141
utils/schemas.py +0 -115
utils/threat_chat.py +0 -154

app.py CHANGED Viewed

@@ -1,7 +1,4 @@
 import os
-from dotenv import load_dotenv
-load_dotenv()
 import logging
 # Fix: Set Hugging Face cache to writable location
@@ -39,8 +36,7 @@ import cv2
 import numpy as np
 from fastapi import BackgroundTasks, FastAPI, File, Form, HTTPException, UploadFile
 from fastapi.middleware.cors import CORSMiddleware
-from fastapi.responses import FileResponse, HTMLResponse, JSONResponse, RedirectResponse, StreamingResponse
-from fastapi.staticfiles import StaticFiles
 import uvicorn
 from inference import process_first_frame, run_inference, run_grounded_sam2_tracking
@@ -57,14 +53,6 @@ from jobs.storage import (
     get_job_storage,
     get_output_video_path,
 )
-from utils.gpt_reasoning import estimate_threat_gpt
-from utils.threat_chat import chat_about_threats
-from utils.relevance import evaluate_relevance
-from utils.enrichment import run_enrichment
-from utils.schemas import AssessmentStatus
-from models.segmenters.model_loader import get_segmenter_detector
-from utils.mission_parser import parse_mission_text, build_broad_queries, MissionParseError
 logging.basicConfig(level=logging.INFO)
 # Suppress noisy external libraries
@@ -72,77 +60,6 @@ logging.getLogger("httpx").setLevel(logging.WARNING)
 logging.getLogger("huggingface_hub").setLevel(logging.WARNING)
 logging.getLogger("transformers").setLevel(logging.WARNING)
-# GPT concurrency limiter — prevents thread exhaustion under load
-_GPT_SEMAPHORE = asyncio.Semaphore(int(os.environ.get("GPT_CONCURRENCY_LIMIT", "4")))
-async def _enrich_first_frame_gpt(
-    job_id: str,
-    frame: np.ndarray,
-    detections: list,
-    enable_gpt: bool,
-    mission_spec,
-) -> None:
-    """Fire-and-forget GPT enrichment for first-frame track cards.
-    Runs concurrently with the video pipeline so the user gets instant
-    first-frame preview (UNASSESSED), then track cards update once GPT
-    finishes (typically 2-5s later).
-    """
-    if not enable_gpt or not detections:
-        return
-    try:
-        # Non-LLM_EXTRACTED relevance filter runs BEFORE run_enrichment (FAST_PATH case)
-        if mission_spec and mission_spec.parse_mode != "LLM_EXTRACTED":
-            for d in detections:
-                decision = evaluate_relevance(d, mission_spec.relevance_criteria)
-                d["mission_relevant"] = decision.relevant
-                d["relevance_reason"] = decision.reason
-            filtered = [d for d in detections if d.get("mission_relevant", True)]
-            if not filtered:
-                for det in detections:
-                    det["assessment_status"] = AssessmentStatus.ASSESSED
-                get_job_storage().update(
-                    job_id,
-                    first_frame_detections=detections,
-                )
-                logging.info("All detections non-relevant for job %s; marked ASSESSED", job_id)
-                return
-        gpt_results = await asyncio.to_thread(
-            run_enrichment, 0, frame, detections, mission_spec,
-            job_id=job_id,
-        )
-        logging.info("Background GPT enrichment complete for job %s", job_id)
-        if not gpt_results:
-            # All detections filtered as not relevant
-            for det in detections:
-                det["assessment_status"] = AssessmentStatus.ASSESSED
-            get_job_storage().update(
-                job_id,
-                first_frame_detections=detections,
-            )
-            logging.info("All detections non-relevant for job %s; marked ASSESSED", job_id)
-            return
-        # Tag any remaining detections without an assessment status
-        for det in detections:
-            if "assessment_status" not in det:
-                det["assessment_status"] = AssessmentStatus.UNASSESSED
-        # Update stored job so frontend polls pick up GPT data
-        get_job_storage().update(
-            job_id,
-            first_frame_detections=detections,
-            first_frame_gpt_results=gpt_results,
-        )
-        logging.info("Updated first_frame_detections with GPT results for job %s", job_id)
-    except Exception:
-        logging.exception("Background GPT enrichment failed for job %s", job_id)
 async def _periodic_cleanup() -> None:
     while True:
         await asyncio.sleep(600)
@@ -168,26 +85,6 @@ app.add_middleware(
 )
-from fastapi import Request
-@app.middleware("http")
-async def add_no_cache_header(request: Request, call_next):
-    """Ensure frontend assets are not cached by the browser (important for HF Spaces updates)."""
-    response = await call_next(request)
-    # Apply to all static files and the root page
-    if request.url.path.startswith("/laser") or request.url.path == "/":
-        response.headers["Cache-Control"] = "no-cache, no-store, must-revalidate"
-        response.headers["Pragma"] = "no-cache"
-        response.headers["Expires"] = "0"
-    return response
-# Optional: serve the LaserPerception frontend from this backend.
-# The frontend files are now located in the 'frontend' directory.
-_FRONTEND_DIR = Path(__file__).with_name("frontend")
-if _FRONTEND_DIR.exists():
-    # Mount the entire frontend directory at /laser (legacy path) or /frontend
-    app.mount("/laser", StaticFiles(directory=_FRONTEND_DIR, html=True), name="laser")
 # Valid detection modes
 VALID_MODES = {"object_detection", "segmentation", "drone_detection"}
@@ -228,7 +125,11 @@ def _schedule_cleanup(background_tasks: BackgroundTasks, path: str) -> None:
     background_tasks.add_task(_cleanup)
-def _default_queries_for_mode(mode: str) -> list[str]:
     if mode == "segmentation":
         return ["object"]
     if mode == "drone_detection":
@@ -236,11 +137,17 @@ def _default_queries_for_mode(mode: str) -> list[str]:
     return ["person", "car", "truck", "motorcycle", "bicycle", "bus", "train", "airplane"]
 @app.get("/", response_class=HTMLResponse)
 async def demo_page():
-    """Redirect to LaserPerception app."""
-    # The main entry point is now index.html in the mounted directory
-    return RedirectResponse(url="/laser/index.html")
 @app.post("/detect")
@@ -252,10 +159,9 @@ async def detect_endpoint(
     detector: str = Form("yolo11"),
     segmenter: str = Form("GSAM2-L"),
     enable_depth: bool = Form(False),
-    enable_gpt: bool = Form(True),
 ):
     """
-    Main detection endpoint.
     Args:
         video: Video file to process
@@ -263,8 +169,7 @@ async def detect_endpoint(
         queries: Comma-separated object classes for object_detection mode
         detector: Model to use (yolo11, detr_resnet50, grounding_dino)
         segmenter: Segmentation model to use (GSAM2-S/B/L, YSAM2-S/B/L)
-        enable_depth: Whether to run legacy depth estimation (default: False)
-        drone_detection uses the dedicated drone_yolo model.
     Returns:
         - For object_detection: Processed video with bounding boxes
@@ -293,10 +198,7 @@ async def detect_endpoint(
         fd, output_path = tempfile.mkstemp(prefix="output_", suffix=".mp4", dir="/tmp")
         os.close(fd)
-        # Parse queries
-        query_list = [q.strip() for q in queries.split(",") if q.strip()]
-        if not query_list:
-            query_list = ["object"]
         try:
             output_path = run_grounded_sam2_tracking(
@@ -343,25 +245,11 @@ async def detect_endpoint(
     fd, output_path = tempfile.mkstemp(prefix="output_", suffix=".mp4", dir="/tmp")
     os.close(fd)
-    # Parse queries with mission awareness
     detector_name = "drone_yolo" if mode == "drone_detection" else detector
-    mission_spec = None
-    if queries.strip():
-        try:
-            mission_spec = parse_mission_text(queries.strip(), detector_name, video_path=input_path)
-            query_list = build_broad_queries(detector_name, mission_spec)
-        except MissionParseError as e:
-            raise HTTPException(status_code=422, detail=str(e))
-    else:
-        query_list = _default_queries_for_mode(mode)
-    if mode == "drone_detection" and not query_list:
-        query_list = ["drone"]
-    # Run inference
     try:
         # Determine depth estimator
         active_depth = "depth" if enable_depth else None
@@ -372,7 +260,6 @@ async def detect_endpoint(
             detector_name=detector_name,
             depth_estimator_name=active_depth,
             depth_scale=25.0,
-            enable_gpt=enable_gpt,
         )
     except ValueError as exc:
         logging.exception("Video processing failed.")
@@ -408,7 +295,6 @@ async def detect_async_endpoint(
     depth_estimator: str = Form("depth"),
     depth_scale: float = Form(25.0),
     enable_depth: bool = Form(False),
-    enable_gpt: bool = Form(True),
     step: int = Form(7),
 ):
     _ttfs_t0 = time.perf_counter()
@@ -440,49 +326,13 @@ async def detect_async_endpoint(
     logging.info("[TTFS:%s] +%.1fs upload_saved", job_id, time.perf_counter() - _ttfs_t0)
-    # --- Mission-Driven Query Parsing ---
-    mission_spec = None
-    mission_mode = "LEGACY"
     detector_name = detector
-    mission_detector = detector  # detector key used for mission query parsing
     if mode == "drone_detection":
         detector_name = "drone_yolo"
-        mission_detector = "drone_yolo"
     elif mode == "segmentation":
-        # Segmenter registry owns detector selection (GSAM2→GDINO, YSAM2→YOLO).
-        # detector_name=None so the job doesn't forward it (avoids duplicate kwarg).
-        try:
-            mission_detector = get_segmenter_detector(segmenter)
-        except ValueError as exc:
-            raise HTTPException(status_code=400, detail=str(exc))
         detector_name = None
-    if queries.strip():
-        try:
-            mission_spec = parse_mission_text(queries.strip(), mission_detector, video_path=str(input_path))
-            query_list = build_broad_queries(mission_detector, mission_spec)
-            mission_mode = "MISSION"
-            logging.info(
-                "Mission parsed: mode=%s classes=%s broad_queries=%s domain=%s(%s)",
-                mission_mode, mission_spec.object_classes, query_list,
-                mission_spec.domain, mission_spec.domain_source,
-            )
-        except MissionParseError as e:
-            raise HTTPException(
-                status_code=422,
-                detail=str(e),
-            )
-    else:
-        # LEGACY mode: no mission context, use defaults, disable GPT
-        query_list = _default_queries_for_mode(mode)
-        enable_gpt = False
-        mission_mode = "LEGACY"
-        logging.info(
-            "LEGACY mode: no mission text, defaults=%s, GPT disabled", query_list
-        )
-    logging.info("[TTFS:%s] +%.1fs mission_parsed", job_id, time.perf_counter() - _ttfs_t0)
     available_depth_estimators = set(list_depth_estimators())
     if depth_estimator not in available_depth_estimators:
@@ -508,8 +358,6 @@ async def detect_async_endpoint(
         )
         cv2.imwrite(str(first_frame_path), processed_frame)
         logging.info("[TTFS:%s] +%.1fs process_first_frame done", job_id, time.perf_counter() - _ttfs_t0)
-        # GPT and depth are now handled in the async pipeline (enrichment thread)
-        first_frame_gpt_results = None
     except Exception:
         logging.exception("First-frame processing failed.")
         shutil.rmtree(job_dir, ignore_errors=True)
@@ -530,26 +378,12 @@ async def detect_async_endpoint(
         depth_scale=float(depth_scale),
         depth_output_path=str(depth_output_path),
         first_frame_depth_path=str(first_frame_depth_path),
-        enable_gpt=enable_gpt,
-        mission_spec=mission_spec,
-        mission_mode=mission_mode,
-        first_frame_gpt_results=first_frame_gpt_results,
         step=step,
         ttfs_t0=_ttfs_t0,
     )
     get_job_storage().create(job)
     asyncio.create_task(process_video_async(job_id))
-    # Fire-and-forget: enrich first-frame detections with GPT in background.
-    # Runs for ALL modes including segmentation — first-frame detections from
-    # process_first_frame() already have stable track IDs (T01, T02, ...) and
-    # valid bboxes, so there's no reason to defer.  The GSAM2 writer's
-    # enrichment thread will see the cached results via first_frame_gpt_results
-    # in JobStorage and skip the duplicate call on frame 0.
-    asyncio.create_task(_enrich_first_frame_gpt(
-        job_id, processed_frame, detections, enable_gpt, mission_spec,
-    ))
     response_data = {
         "job_id": job_id,
         "first_frame_url": f"/detect/first-frame/{job_id}",
@@ -560,21 +394,8 @@ async def detect_async_endpoint(
         "stream_url": f"/detect/stream/{job_id}",
         "status": job.status.value,
         "first_frame_detections": detections,
-        "mission_mode": mission_mode,
     }
-    if mission_spec:
-        response_data["mission_spec"] = {
-            "object_classes": mission_spec.object_classes,
-            "mission_intent": mission_spec.mission_intent,
-            "domain": mission_spec.domain,
-            "domain_source": mission_spec.domain_source,
-            "parse_confidence": mission_spec.parse_confidence,
-            "parse_warnings": mission_spec.parse_warnings,
-            "context_phrases": mission_spec.context_phrases,
-            "stripped_modifiers": mission_spec.stripped_modifiers,
-        }
     return response_data
@@ -618,59 +439,6 @@ async def get_frame_tracks(job_id: str, frame_idx: int):
     return data or []
-@app.post("/detect/analyze-frame")
-async def analyze_frame(
-    image: UploadFile = File(...),
-    detections: str = Form(...),
-    job_id: str = Form(None),
-):
-    """Run GPT threat assessment on a single video frame."""
-    import json as json_module
-    from utils.gpt_reasoning import encode_frame_to_b64
-    dets = json_module.loads(detections)
-    # Look up mission_spec from stored job (if available)
-    mission_spec = None
-    if job_id:
-        job = get_job_storage().get(job_id)
-        if job:
-            mission_spec = job.mission_spec
-    # Decode uploaded image
-    image_bytes = await image.read()
-    nparr = np.frombuffer(image_bytes, np.uint8)
-    frame = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
-    if frame is None:
-        raise HTTPException(status_code=400, detail="Invalid image")
-    # Run GPT in thread pool (blocking OpenAI API call)
-    frame_b64 = encode_frame_to_b64(frame)
-    async with _GPT_SEMAPHORE:
-        gpt_results = await asyncio.to_thread(
-            estimate_threat_gpt,
-            detections=dets,
-            mission_spec=mission_spec,
-            image_b64=frame_b64,
-        )
-    # Merge GPT results into detection records
-    for d in dets:
-        oid = d.get("track_id") or d.get("id")
-        if oid and oid in gpt_results:
-            payload = gpt_results[oid]
-            d["gpt_raw"] = payload
-            d["assessment_status"] = payload.get("assessment_status", "ASSESSED")
-            d["threat_level_score"] = payload.get("threat_level_score", 0)
-            d["threat_classification"] = payload.get("threat_classification", "Unknown")
-            d["weapon_readiness"] = payload.get("weapon_readiness", "Unknown")
-            d["gpt_description"] = payload.get("gpt_description")
-            d["gpt_distance_m"] = payload.get("gpt_distance_m")
-            d["gpt_direction"] = payload.get("gpt_direction")
-    return dets
 @app.delete("/detect/job/{job_id}")
 async def cancel_job(job_id: str):
     """Cancel a running job."""
@@ -856,93 +624,6 @@ async def stream_video(job_id: str):
     )
-@app.post("/reason/track")
-async def reason_track(
-    frame: UploadFile = File(...),
-    tracks: str = Form(...)  # JSON string of tracks: [{"id": "T01", "bbox": [x,y,w,h], "label": "car"}, ...]
-):
-    """
-    Reason about specific tracks in a frame using GPT.
-    Returns distance and description for each object ID.
-    """
-    import json
-    try:
-        input_path = _save_upload_to_tmp(frame)
-    except Exception:
-        raise HTTPException(status_code=500, detail="Failed to save uploaded frame")
-    try:
-        track_list = json.loads(tracks)
-    except json.JSONDecodeError:
-        _safe_delete(input_path)
-        raise HTTPException(status_code=400, detail="Invalid tracks JSON")
-    # Run GPT estimation
-    # This is blocking, but that's expected for this endpoint structure.
-    # For high concurrency, might want to offload to threadpool or async wrapper.
-    try:
-        async with _GPT_SEMAPHORE:
-            results = await asyncio.to_thread(estimate_threat_gpt, input_path, track_list)
-        logging.info(f"GPT Output for Video Track Update:\n{results}")
-    except Exception as e:
-        logging.exception("GPT reasoning failed")
-        _safe_delete(input_path)
-        raise HTTPException(status_code=500, detail=str(e))
-    _safe_delete(input_path)
-    return results
-@app.post("/chat/threat")
-async def chat_threat_endpoint(
-    question: str = Form(...),
-    detections: str = Form(...),  # JSON string of current detections
-    mission_context: str = Form(""),  # Optional JSON string of mission spec
-):
-    """
-    Chat about detected threats using GPT.
-    Args:
-        question: User's question about the current threat situation.
-        detections: JSON string of detection list with threat analysis data.
-        mission_context: Optional JSON string of mission specification.
-    Returns:
-        GPT response about the threats.
-    """
-    import json as json_module
-    if not question.strip():
-        raise HTTPException(status_code=400, detail="Question cannot be empty.")
-    try:
-        detection_list = json_module.loads(detections)
-    except json_module.JSONDecodeError:
-        raise HTTPException(status_code=400, detail="Invalid detections JSON.")
-    if not isinstance(detection_list, list):
-        raise HTTPException(status_code=400, detail="Detections must be a list.")
-    # Parse optional mission context
-    mission_spec_dict = None
-    if mission_context.strip():
-        try:
-            mission_spec_dict = json_module.loads(mission_context)
-        except json_module.JSONDecodeError:
-            pass  # Non-critical, proceed without mission context
-    # Run chat in thread to avoid blocking (with concurrency limit)
-    try:
-        async with _GPT_SEMAPHORE:
-            response = await asyncio.to_thread(
-                chat_about_threats, question, detection_list, mission_spec_dict
-            )
-        return {"response": response}
-    except Exception as e:
-        logging.exception("Threat chat failed")
-        raise HTTPException(status_code=500, detail=str(e))
 @app.post("/benchmark")
 async def benchmark_endpoint(
     video: UploadFile = File(...),
@@ -990,7 +671,6 @@ async def benchmark_endpoint(
             query_list,
             segmenter_name=segmenter,
             step=step,
-            enable_gpt=False,
             _perf_metrics=metrics,
             _perf_lock=lock,
             num_maskmem=num_maskmem,

 import os
 import logging
 # Fix: Set Hugging Face cache to writable location
 import numpy as np
 from fastapi import BackgroundTasks, FastAPI, File, Form, HTTPException, UploadFile
 from fastapi.middleware.cors import CORSMiddleware
+from fastapi.responses import FileResponse, HTMLResponse, JSONResponse, StreamingResponse
 import uvicorn
 from inference import process_first_frame, run_inference, run_grounded_sam2_tracking
     get_job_storage,
     get_output_video_path,
 )
 logging.basicConfig(level=logging.INFO)
 # Suppress noisy external libraries
 logging.getLogger("huggingface_hub").setLevel(logging.WARNING)
 logging.getLogger("transformers").setLevel(logging.WARNING)
 async def _periodic_cleanup() -> None:
     while True:
         await asyncio.sleep(600)
 )
 # Valid detection modes
 VALID_MODES = {"object_detection", "segmentation", "drone_detection"}
     background_tasks.add_task(_cleanup)
+def _parse_queries(raw: str, mode: str) -> list[str]:
+    """Parse comma-separated query string, falling back to mode defaults."""
+    parsed = [q.strip() for q in raw.split(",") if q.strip()]
+    if parsed:
+        return parsed
     if mode == "segmentation":
         return ["object"]
     if mode == "drone_detection":
     return ["person", "car", "truck", "motorcycle", "bicycle", "bus", "train", "airplane"]
+# Cache index.html at module load
+_INDEX_HTML_PATH = Path(__file__).with_name("index.html")
+_INDEX_HTML = _INDEX_HTML_PATH.read_text() if _INDEX_HTML_PATH.exists() else None
 @app.get("/", response_class=HTMLResponse)
 async def demo_page():
+    """Serve minimal detection UI."""
+    if _INDEX_HTML:
+        return HTMLResponse(_INDEX_HTML)
+    return HTMLResponse("<h1>Detection Base</h1><p>index.html not found</p>")
 @app.post("/detect")
     detector: str = Form("yolo11"),
     segmenter: str = Form("GSAM2-L"),
     enable_depth: bool = Form(False),
 ):
     """
+    Main detection endpoint (synchronous).
     Args:
         video: Video file to process
         queries: Comma-separated object classes for object_detection mode
         detector: Model to use (yolo11, detr_resnet50, grounding_dino)
         segmenter: Segmentation model to use (GSAM2-S/B/L, YSAM2-S/B/L)
+        enable_depth: Whether to run depth estimation (default: False)
     Returns:
         - For object_detection: Processed video with bounding boxes
         fd, output_path = tempfile.mkstemp(prefix="output_", suffix=".mp4", dir="/tmp")
         os.close(fd)
+        query_list = _parse_queries(queries, mode)
         try:
             output_path = run_grounded_sam2_tracking(
     fd, output_path = tempfile.mkstemp(prefix="output_", suffix=".mp4", dir="/tmp")
     os.close(fd)
     detector_name = "drone_yolo" if mode == "drone_detection" else detector
+    query_list = _parse_queries(queries, mode)
     try:
         # Determine depth estimator
         active_depth = "depth" if enable_depth else None
             detector_name=detector_name,
             depth_estimator_name=active_depth,
             depth_scale=25.0,
         )
     except ValueError as exc:
         logging.exception("Video processing failed.")
     depth_estimator: str = Form("depth"),
     depth_scale: float = Form(25.0),
     enable_depth: bool = Form(False),
     step: int = Form(7),
 ):
     _ttfs_t0 = time.perf_counter()
     logging.info("[TTFS:%s] +%.1fs upload_saved", job_id, time.perf_counter() - _ttfs_t0)
     detector_name = detector
     if mode == "drone_detection":
         detector_name = "drone_yolo"
     elif mode == "segmentation":
         detector_name = None
+    query_list = _parse_queries(queries, mode)
     available_depth_estimators = set(list_depth_estimators())
     if depth_estimator not in available_depth_estimators:
         )
         cv2.imwrite(str(first_frame_path), processed_frame)
         logging.info("[TTFS:%s] +%.1fs process_first_frame done", job_id, time.perf_counter() - _ttfs_t0)
     except Exception:
         logging.exception("First-frame processing failed.")
         shutil.rmtree(job_dir, ignore_errors=True)
         depth_scale=float(depth_scale),
         depth_output_path=str(depth_output_path),
         first_frame_depth_path=str(first_frame_depth_path),
         step=step,
         ttfs_t0=_ttfs_t0,
     )
     get_job_storage().create(job)
     asyncio.create_task(process_video_async(job_id))
     response_data = {
         "job_id": job_id,
         "first_frame_url": f"/detect/first-frame/{job_id}",
         "stream_url": f"/detect/stream/{job_id}",
         "status": job.status.value,
         "first_frame_detections": detections,
     }
     return response_data
     return data or []
 @app.delete("/detect/job/{job_id}")
 async def cancel_job(job_id: str):
     """Cancel a running job."""
     )
 @app.post("/benchmark")
 async def benchmark_endpoint(
     video: UploadFile = File(...),
             query_list,
             segmenter_name=segmenter,
             step=step,
             _perf_metrics=metrics,
             _perf_lock=lock,
             num_maskmem=num_maskmem,

inference.py CHANGED Viewed

@@ -21,12 +21,8 @@ from models.model_loader import load_detector, load_detector_on_device
 from models.segmenters.model_loader import load_segmenter, load_segmenter_on_device
 from models.depth_estimators.model_loader import load_depth_estimator, load_depth_estimator_on_device
 from utils.video import StreamingVideoWriter
-from utils.relevance import evaluate_relevance
-from utils.enrichment import run_enrichment
-from utils.schemas import AssessmentStatus
 from jobs.storage import set_track_data
 import tempfile
-import json as json_module
 class AsyncVideoReader:
@@ -301,9 +297,7 @@ class SpeedEstimator:
             dist_px = np.sqrt((cx1-cx2)**2 + (cy1-cy2)**2)
-            # Heuristic scale: Assume car is ~4m long? Or just arbitrary pixel scale
-            # If we had GPT distance, we could calibrate.
-            # For now, let's use a dummy scale: 50px = 1m (very rough)
             # Speed = (dist_px / 50) meters / (5 frames / 30 fps) seconds
             #       = (dist_px / 50) / (0.166) m/s
             #       = (dist_px * 0.12) m/s
@@ -403,7 +397,7 @@ def _attach_depth_metrics(
     depth_scale: float,  # No longer used for distance calculation
     estimator_instance: Optional[Any] = None,
 ) -> None:
-    """Attach relative depth values for visualization only. GPT handles distance estimation."""
     if not detections or (not depth_estimator_name and not estimator_instance):
         return
@@ -514,16 +508,7 @@ def infer_frame(
             except Exception:
                 logging.exception("Depth estimation failed for frame")
-        # Re-build display labels to include GPT distance if available
-        display_labels = []
-        for i, det in enumerate(detections):
-            label = det["label"]
-            if det.get("gpt_distance_m") is not None:
-                # Add GPT distance to label, e.g. "car 12m"
-                depth_str = f"{int(det['gpt_distance_m'])}m"
-                label = f"{label} {depth_str}"
-                logging.debug("Object '%s' at %s (bbox: %s)", label, depth_str, det['bbox'])
-            display_labels.append(label)
     except Exception:
         logging.exception("Inference failed for queries %s", text_queries)
@@ -537,15 +522,8 @@ def infer_frame(
     ), detections
-def _build_display_label(det):
-    """Build display label with GPT distance if available."""
-    label = det["label"]
-    if det.get("gpt_distance_m") is not None:
-        label = f"{label} {int(det['gpt_distance_m'])}m"
-    return label
 def _attach_depth_from_result(detections, depth_result, depth_scale):
-    """Attach relative depth values for visualization only. GPT handles distance estimation."""
     depth_map = depth_result.depth_map
     if depth_map is None or depth_map.size == 0: return
@@ -644,11 +622,8 @@ def process_first_frame(
 ) -> Tuple[np.ndarray, List[Dict[str, Any]]]:
     """Lightweight first-frame processing: detection + rendering only.
-    GPT, depth, and LLM relevance are handled later in the async pipeline
-    (writer enrichment thread), avoiding 2-8s synchronous startup delay.
     Returns:
-        (processed_frame, detections) — all detections tagged UNASSESSED.
     """
     frame, _, _, _ = extract_first_frame(video_path)
     if mode == "segmentation":
@@ -665,7 +640,6 @@ def process_first_frame(
                     "bbox": [int(c) for c in box],
                     "score": float(seg_result.scores[idx]) if seg_result.scores is not None and idx < len(seg_result.scores) else 1.0,
                     "track_id": f"T{idx + 1:02d}",
-                    "assessment_status": AssessmentStatus.UNASSESSED,
                 })
         return processed, detections
@@ -673,10 +647,6 @@ def process_first_frame(
         frame, queries, detector_name=detector_name
     )
-    # Tag all detections as unassessed — GPT runs later in enrichment thread
-    for det in detections:
-        det["assessment_status"] = AssessmentStatus.UNASSESSED
     return processed, detections
@@ -689,10 +659,7 @@ def run_inference(
     job_id: Optional[str] = None,
     depth_estimator_name: Optional[str] = None,
     depth_scale: float = 1.0,
-    enable_gpt: bool = True,
     stream_queue: Optional[Queue] = None,
-    mission_spec=None,  # Optional[MissionSpecification]
-    first_frame_gpt_results: Optional[Dict[str, Any]] = None,
     first_frame_detections: Optional[List[Dict[str, Any]]] = None,
 ) -> Tuple[str, List[List[Dict[str, Any]]]]:
@@ -769,8 +736,7 @@ def run_inference(
     # queue_in: (frame_idx, frame_data)
     # queue_out: (frame_idx, processed_frame, detections)
     queue_in = Queue(maxsize=16)
-    # Tuning for A10: buffer at least 32 frames per GPU (batch size)
-    # GPT Latency Buffer: GPT takes ~3s. At 30fps, that's 90 frames. We need to absorb this burst.
     queue_out_max = max(128, (len(detectors) if detectors else 1) * 32)
     queue_out = Queue(maxsize=queue_out_max)
@@ -948,32 +914,6 @@ def run_inference(
     # writer_finished = False
-    # --- GPT Enrichment Thread (non-blocking) ---
-    # Runs LLM relevance + GPT threat assessment off the writer's critical path.
-    gpt_enrichment_queue = Queue(maxsize=4)
-    _relevance_refined = Event()
-    def enrichment_thread_fn(tracker_ref):
-        """Dedicated thread for GPT/LLM calls. Receives work from writer, injects results via tracker."""
-        while True:
-            item = gpt_enrichment_queue.get()
-            if item is None:
-                break  # Sentinel — shutdown
-            frame_idx, frame_data, gpt_dets, ms = item
-            try:
-                gpt_res = run_enrichment(
-                    frame_idx, frame_data, gpt_dets, ms,
-                    first_frame_gpt_results=first_frame_gpt_results,
-                    job_id=job_id,
-                    relevance_refined_event=_relevance_refined,
-                )
-                if gpt_res:
-                    tracker_ref.inject_metadata(gpt_dets)
-                    logging.info("Enrichment: GPT results injected into tracker for frame %d", frame_idx)
-            except Exception as e:
-                logging.error("Enrichment thread failed for frame %d: %s", frame_idx, e)
     def writer_loop():
         nonlocal writer_finished
         next_idx = 0
@@ -982,11 +922,6 @@ def run_inference(
         # Initialize Tracker & Speed Estimator
         tracker = ByteTracker(frame_rate=fps)
         speed_est = SpeedEstimator(fps=fps)
-        gpt_submitted = False  # GPT enrichment submitted once for frame 0
-        # Start enrichment thread
-        enrich_thread = Thread(target=enrichment_thread_fn, args=(tracker,), daemon=True)
-        enrich_thread.start()
         try:
             with StreamingVideoWriter(output_video_path, fps, width, height) as writer:
@@ -1016,67 +951,11 @@ def run_inference(
                                          next_idx, pre_track_count, len(dets))
                         speed_est.estimate(dets)
-                        # --- RELEVANCE GATE (deterministic, fast — stays in writer) ---
-                        if mission_spec:
-                            if (mission_spec.parse_mode == "LLM_EXTRACTED"
-                                    and not _relevance_refined.is_set()):
-                                # LLM post-filter hasn't run yet — pass all through
-                                for d in dets:
-                                    d["mission_relevant"] = True
-                                    d["relevance_reason"] = "pending_llm_postfilter"
-                                gpt_dets = dets
-                            else:
-                                # Normal deterministic gate (with refined or FAST_PATH classes)
-                                for d in dets:
-                                    decision = evaluate_relevance(d, mission_spec.relevance_criteria)
-                                    d["mission_relevant"] = decision.relevant
-                                    d["relevance_reason"] = decision.reason
-                                    if not decision.relevant:
-                                        logging.info(
-                                            json_module.dumps({
-                                                "event": "relevance_decision",
-                                                "track_id": d.get("track_id"),
-                                                "label": d.get("label"),
-                                                "relevant": False,
-                                                "reason": decision.reason,
-                                                "required_classes": mission_spec.relevance_criteria.required_classes,
-                                                "frame": next_idx,
-                                            })
-                                        )
-                                gpt_dets = [d for d in dets if d.get("mission_relevant", True)]
-                        else:
-                            for d in dets:
-                                d["mission_relevant"] = None
-                            gpt_dets = dets
-                        # --- GPT ENRICHMENT (non-blocking, offloaded to enrichment thread) ---
-                        if enable_gpt and gpt_dets and not gpt_submitted:
-                            # Tag as pending — enrichment thread will update to ASSESSED later
-                            for d in gpt_dets:
-                                d["assessment_status"] = AssessmentStatus.PENDING_GPT
-                            try:
-                                gpt_enrichment_queue.put(
-                                    (next_idx, p_frame.copy(), gpt_dets, mission_spec),
-                                    timeout=1.0,
-                                )
-                                gpt_submitted = True
-                                logging.info("Writer: offloaded GPT enrichment for frame %d", next_idx)
-                            except Full:
-                                logging.warning("GPT enrichment queue full, skipping frame 0 GPT")
-                        # Tag unassessed detections (INV-6)
-                        for d in dets:
-                            if "assessment_status" not in d:
-                                d["assessment_status"] = AssessmentStatus.UNASSESSED
                         # --- RENDER BOXES & OVERLAYS ---
                         if dets:
                             display_boxes = np.array([d['bbox'] for d in dets])
                             display_labels = []
                             for d in dets:
-                                if d.get("mission_relevant") is False:
-                                    display_labels.append("")
-                                    continue
                                 lbl = d.get('label', 'obj')
                                 display_labels.append(lbl)
@@ -1131,12 +1010,6 @@ def run_inference(
             logging.exception("Writer loop failed")
         finally:
             logging.info("Writer loop finished. Wrote %d frames (target %d)", next_idx, total_frames)
-            # Shut down enrichment thread
-            try:
-                gpt_enrichment_queue.put(None, timeout=5.0)
-                enrich_thread.join(timeout=30)
-            except Exception:
-                logging.warning("Enrichment thread shutdown timed out")
             writer_finished = True
     writer_thread = Thread(target=writer_loop, daemon=True)
@@ -1213,8 +1086,7 @@ def _gsam2_render_frame(
 ) -> np.ndarray:
     """Render a single GSAM2 tracking frame (masks + boxes). CPU-only.
-    When *masks_only* is True, skip box rendering so the writer thread can
-    draw boxes later with enriched (GPT) labels.
     """
     if frame_store is not None:
         frame = frame_store.get_bgr(frame_idx).copy()  # .copy() — render mutates
@@ -1274,9 +1146,6 @@ def run_grounded_sam2_tracking(
     job_id: Optional[str] = None,
     stream_queue: Optional[Queue] = None,
     step: int = 20,
-    enable_gpt: bool = False,
-    mission_spec=None,  # Optional[MissionSpecification]
-    first_frame_gpt_results: Optional[Dict[str, Any]] = None,
     _perf_metrics: Optional[Dict[str, float]] = None,
     _perf_lock=None,
     num_maskmem: Optional[int] = None,
@@ -1376,7 +1245,6 @@ def run_grounded_sam2_tracking(
                     frm = _gsam2_render_frame(
                         frame_dir, frame_names, fidx, fobjs,
                         height, width,
-                        masks_only=enable_gpt,
                         frame_store=frame_store,
                     )
@@ -1387,7 +1255,7 @@ def run_grounded_sam2_tracking(
                         else:
                             _perf_metrics["render_total_ms"] += _r_ms
-                    payload = (fidx, frm, fobjs) if enable_gpt else (fidx, frm, {})
                     while True:
                         try:
                             render_out.put(payload, timeout=1.0)
@@ -1410,92 +1278,6 @@ def run_grounded_sam2_tracking(
         for t in r_workers:
             t.start()
-        # --- ObjectInfo → detection dict adapter ---
-        def _objectinfo_to_dets(frame_objects_dict):
-            dets = []
-            for obj_id, info in frame_objects_dict.items():
-                dets.append({
-                    "label": info.class_name,
-                    "bbox": [info.x1, info.y1, info.x2, info.y2],
-                    "score": 1.0,
-                    "track_id": f"T{obj_id:02d}",
-                    "instance_id": obj_id,
-                })
-            return dets
-        # --- GPT enrichment thread (when enabled) ---
-        gpt_enrichment_queue: Queue = Queue(maxsize=4)
-        gpt_data_by_track: Dict[str, Dict] = {}
-        gpt_data_lock = RLock()
-        _relevance_refined = Event()
-        def _gsam2_enrichment_thread_fn():
-            while True:
-                item = gpt_enrichment_queue.get()
-                if item is None:
-                    break
-                frame_idx, frame_data, gpt_dets, ms = item
-                try:
-                    gpt_res = run_enrichment(
-                        frame_idx, frame_data, gpt_dets, ms,
-                        first_frame_gpt_results=first_frame_gpt_results,
-                        job_id=job_id,
-                        relevance_refined_event=_relevance_refined,
-                    )
-                    # GSAM2-specific: store results in per-track dict and persist to job storage
-                    if gpt_res:
-                        for d in gpt_dets:
-                            tid = d.get("track_id")
-                            if tid and tid in gpt_res:
-                                merged = dict(gpt_res[tid])
-                                merged["gpt_raw"] = gpt_res[tid]
-                                merged["assessment_frame_index"] = frame_idx
-                                merged["assessment_status"] = merged.get(
-                                    "assessment_status", AssessmentStatus.ASSESSED
-                                )
-                                with gpt_data_lock:
-                                    gpt_data_by_track[tid] = merged
-                        logging.info("GSAM2 enrichment: GPT results stored for %d tracks", len(gpt_data_by_track))
-                        # Persist GPT-enriched detections to job storage so
-                        # frontend polling (/detect/status) picks them up.
-                        if job_id:
-                            try:
-                                from jobs.storage import get_job_storage as _gjs
-                                _st = _gjs().get(job_id)
-                                if _st and _st.first_frame_detections:
-                                    for det in _st.first_frame_detections:
-                                        tid = det.get("track_id")
-                                        with gpt_data_lock:
-                                            payload = gpt_data_by_track.get(tid)
-                                        if payload:
-                                            det.update(payload)
-                                        # Also sync relevance from gpt_dets
-                                        src = next((d for d in gpt_dets if d.get("track_id") == tid), None)
-                                        if src:
-                                            if "mission_relevant" in src:
-                                                det["mission_relevant"] = src["mission_relevant"]
-                                            if "relevance_reason" in src:
-                                                det["relevance_reason"] = src["relevance_reason"]
-                                    from jobs.storage import get_job_storage as _gjs2
-                                    _gjs2().update(
-                                        job_id,
-                                        first_frame_detections=_st.first_frame_detections,
-                                        first_frame_gpt_results=gpt_res,
-                                    )
-                                    logging.info(
-                                        "GSAM2 enrichment: updated first_frame_detections in job storage for %s",
-                                        job_id,
-                                    )
-                            except Exception:
-                                logging.exception(
-                                    "GSAM2 enrichment: failed to update job storage for %s", job_id
-                                )
-                except Exception as e:
-                    logging.error("GSAM2 enrichment thread failed for frame %d: %s", frame_idx, e)
         # Shared streaming state (publisher ↔ writer)
         _stream_deque: collections.deque = collections.deque(maxlen=200)
         _stream_lock = RLock()
@@ -1508,15 +1290,6 @@ def run_grounded_sam2_tracking(
             buf: Dict[int, Tuple] = {}
             # Per-track bbox history (replaces ByteTracker for GSAM2)
-            track_history: Dict[int, List] = {}
-            speed_est = SpeedEstimator(fps=fps) if enable_gpt else None
-            gpt_submitted = False
-            # Start enrichment thread when GPT enabled
-            enrich_thread = None
-            if enable_gpt:
-                enrich_thread = Thread(target=_gsam2_enrichment_thread_fn, daemon=True)
-                enrich_thread.start()
             try:
                 with StreamingVideoWriter(
@@ -1538,100 +1311,6 @@ def run_grounded_sam2_tracking(
                             frm, fobjs = buf.pop(next_idx)
-                            # --- GPT enrichment path ---
-                            if enable_gpt and fobjs:
-                                dets = _objectinfo_to_dets(fobjs)
-                                # Maintain per-track bbox history (30-frame window)
-                                for det in dets:
-                                    iid = det["instance_id"]
-                                    track_history.setdefault(iid, []).append(det["bbox"])
-                                    if len(track_history[iid]) > 30:
-                                        track_history[iid].pop(0)
-                                    # Store an immutable per-frame snapshot.
-                                    det["history"] = list(track_history[iid])
-                                # Speed estimation
-                                if speed_est:
-                                    speed_est.estimate(dets)
-                                # Relevance gate
-                                if mission_spec:
-                                    if (mission_spec.parse_mode == "LLM_EXTRACTED"
-                                            and not _relevance_refined.is_set()):
-                                        for d in dets:
-                                            d["mission_relevant"] = True
-                                            d["relevance_reason"] = "pending_llm_postfilter"
-                                        gpt_dets = dets
-                                    else:
-                                        for d in dets:
-                                            decision = evaluate_relevance(d, mission_spec.relevance_criteria)
-                                            d["mission_relevant"] = decision.relevant
-                                            d["relevance_reason"] = decision.reason
-                                        gpt_dets = [d for d in dets if d.get("mission_relevant", True)]
-                                else:
-                                    for d in dets:
-                                        d["mission_relevant"] = None
-                                    gpt_dets = dets
-                                # GPT enrichment (one-shot, first frame with detections)
-                                if gpt_dets and not gpt_submitted:
-                                    for d in gpt_dets:
-                                        d["assessment_status"] = AssessmentStatus.PENDING_GPT
-                                    try:
-                                        gpt_enrichment_queue.put(
-                                            (
-                                                next_idx,
-                                                frm.copy(),
-                                                copy.deepcopy(gpt_dets),
-                                                mission_spec,
-                                            ),
-                                            timeout=1.0,
-                                        )
-                                        gpt_submitted = True
-                                        logging.info("GSAM2 writer: offloaded GPT enrichment for frame %d", next_idx)
-                                    except Full:
-                                        logging.warning("GSAM2 GPT enrichment queue full, skipping")
-                                # Merge persistent GPT data
-                                for det in dets:
-                                    tid = det["track_id"]
-                                    with gpt_data_lock:
-                                        gpt_payload = gpt_data_by_track.get(tid)
-                                    if gpt_payload:
-                                        det.update(gpt_payload)
-                                        det["assessment_status"] = AssessmentStatus.ASSESSED
-                                    elif "assessment_status" not in det:
-                                        det["assessment_status"] = AssessmentStatus.UNASSESSED
-                                # Build enriched display labels
-                                display_labels = []
-                                for d in dets:
-                                    if d.get("mission_relevant") is False:
-                                        display_labels.append("")
-                                        continue
-                                    lbl = d.get("label", "obj")
-                                    if d.get("gpt_distance_m") is not None:
-                                        try:
-                                            lbl = f"{lbl} {int(float(d['gpt_distance_m']))}m"
-                                        except (TypeError, ValueError):
-                                            pass
-                                    display_labels.append(lbl)
-                                # Draw boxes on mask-rendered frame
-                                if dets:
-                                    boxes = np.array([d["bbox"] for d in dets])
-                                    frm = draw_boxes(frm, boxes, label_names=display_labels)
-                                # Store tracks for frontend
-                                if job_id:
-                                    set_track_data(job_id, next_idx, copy.deepcopy(dets))
-                            elif enable_gpt:
-                                # No objects this frame — still store empty track data
-                                if job_id:
-                                    set_track_data(job_id, next_idx, [])
                             if _perf_metrics is not None:
                                 _t_w = time.perf_counter()
@@ -1668,13 +1347,6 @@ def run_grounded_sam2_tracking(
             finally:
                 render_done = True
                 _stream_writer_done.set()
-                # Shut down enrichment thread
-                if enrich_thread:
-                    try:
-                        gpt_enrichment_queue.put(None, timeout=5.0)
-                        enrich_thread.join(timeout=30)
-                    except Exception:
-                        logging.warning("GSAM2 enrichment thread shutdown timed out")
         def _stream_publisher_thread():
             """Adaptive-rate publisher: reads from _stream_deque, publishes at measured pace."""

 from models.segmenters.model_loader import load_segmenter, load_segmenter_on_device
 from models.depth_estimators.model_loader import load_depth_estimator, load_depth_estimator_on_device
 from utils.video import StreamingVideoWriter
 from jobs.storage import set_track_data
 import tempfile
 class AsyncVideoReader:
             dist_px = np.sqrt((cx1-cx2)**2 + (cy1-cy2)**2)
+            # Heuristic scale: 50px = 1m (very rough)
             # Speed = (dist_px / 50) meters / (5 frames / 30 fps) seconds
             #       = (dist_px / 50) / (0.166) m/s
             #       = (dist_px * 0.12) m/s
     depth_scale: float,  # No longer used for distance calculation
     estimator_instance: Optional[Any] = None,
 ) -> None:
+    """Attach relative depth values to detection dicts for visualization."""
     if not detections or (not depth_estimator_name and not estimator_instance):
         return
             except Exception:
                 logging.exception("Depth estimation failed for frame")
+        display_labels = [det["label"] for det in detections]
     except Exception:
         logging.exception("Inference failed for queries %s", text_queries)
     ), detections
 def _attach_depth_from_result(detections, depth_result, depth_scale):
+    """Attach relative depth values to detection dicts for visualization."""
     depth_map = depth_result.depth_map
     if depth_map is None or depth_map.size == 0: return
 ) -> Tuple[np.ndarray, List[Dict[str, Any]]]:
     """Lightweight first-frame processing: detection + rendering only.
     Returns:
+        (processed_frame, detections)
     """
     frame, _, _, _ = extract_first_frame(video_path)
     if mode == "segmentation":
                     "bbox": [int(c) for c in box],
                     "score": float(seg_result.scores[idx]) if seg_result.scores is not None and idx < len(seg_result.scores) else 1.0,
                     "track_id": f"T{idx + 1:02d}",
                 })
         return processed, detections
         frame, queries, detector_name=detector_name
     )
     return processed, detections
     job_id: Optional[str] = None,
     depth_estimator_name: Optional[str] = None,
     depth_scale: float = 1.0,
     stream_queue: Optional[Queue] = None,
     first_frame_detections: Optional[List[Dict[str, Any]]] = None,
 ) -> Tuple[str, List[List[Dict[str, Any]]]]:
     # queue_in: (frame_idx, frame_data)
     # queue_out: (frame_idx, processed_frame, detections)
     queue_in = Queue(maxsize=16)
+    # Buffer at least 32 frames per GPU for pipeline overlap
     queue_out_max = max(128, (len(detectors) if detectors else 1) * 32)
     queue_out = Queue(maxsize=queue_out_max)
     # writer_finished = False
     def writer_loop():
         nonlocal writer_finished
         next_idx = 0
         # Initialize Tracker & Speed Estimator
         tracker = ByteTracker(frame_rate=fps)
         speed_est = SpeedEstimator(fps=fps)
         try:
             with StreamingVideoWriter(output_video_path, fps, width, height) as writer:
                                          next_idx, pre_track_count, len(dets))
                         speed_est.estimate(dets)
                         # --- RENDER BOXES & OVERLAYS ---
                         if dets:
                             display_boxes = np.array([d['bbox'] for d in dets])
                             display_labels = []
                             for d in dets:
                                 lbl = d.get('label', 'obj')
                                 display_labels.append(lbl)
             logging.exception("Writer loop failed")
         finally:
             logging.info("Writer loop finished. Wrote %d frames (target %d)", next_idx, total_frames)
             writer_finished = True
     writer_thread = Thread(target=writer_loop, daemon=True)
 ) -> np.ndarray:
     """Render a single GSAM2 tracking frame (masks + boxes). CPU-only.
+    When *masks_only* is True, skip box rendering.
     """
     if frame_store is not None:
         frame = frame_store.get_bgr(frame_idx).copy()  # .copy() — render mutates
     job_id: Optional[str] = None,
     stream_queue: Optional[Queue] = None,
     step: int = 20,
     _perf_metrics: Optional[Dict[str, float]] = None,
     _perf_lock=None,
     num_maskmem: Optional[int] = None,
                     frm = _gsam2_render_frame(
                         frame_dir, frame_names, fidx, fobjs,
                         height, width,
                         frame_store=frame_store,
                     )
                         else:
                             _perf_metrics["render_total_ms"] += _r_ms
+                    payload = (fidx, frm, {})
                     while True:
                         try:
                             render_out.put(payload, timeout=1.0)
         for t in r_workers:
             t.start()
         # Shared streaming state (publisher ↔ writer)
         _stream_deque: collections.deque = collections.deque(maxlen=200)
         _stream_lock = RLock()
             buf: Dict[int, Tuple] = {}
             # Per-track bbox history (replaces ByteTracker for GSAM2)
             try:
                 with StreamingVideoWriter(
                             frm, fobjs = buf.pop(next_idx)
                             if _perf_metrics is not None:
                                 _t_w = time.perf_counter()
             finally:
                 render_done = True
                 _stream_writer_done.set()
         def _stream_publisher_thread():
             """Adaptive-rate publisher: reads from _stream_deque, publishes at measured pace."""

jobs/background.py CHANGED Viewed

@@ -35,9 +35,6 @@ async def process_video_async(job_id: str) -> None:
                 job_id=job_id,
                 stream_queue=stream_queue,
                 step=job.step,
-                enable_gpt=job.enable_gpt,
-                mission_spec=job.mission_spec,
-                first_frame_gpt_results=job.first_frame_gpt_results,
                 num_maskmem=7,
                 detector_name=job.detector_name,
                 _ttfs_t0=job.ttfs_t0,
@@ -53,13 +50,10 @@ async def process_video_async(job_id: str) -> None:
                 None,
                 job.detector_name,
                 job_id,
-                job.depth_estimator_name, # Pass depth estimator to trigger unified loop
                 job.depth_scale,
-                job.enable_gpt,
                 stream_queue,
-                job.mission_spec,  # Forward mission spec for relevance gating
-                job.first_frame_gpt_results,  # Avoid duplicate GPT call on frame 0
-                job.first_frame_detections,  # Reuse frame 0 detections (avoid re-detecting)
             )
             detection_path, detections_list = result_pkg

                 job_id=job_id,
                 stream_queue=stream_queue,
                 step=job.step,
                 num_maskmem=7,
                 detector_name=job.detector_name,
                 _ttfs_t0=job.ttfs_t0,
                 None,
                 job.detector_name,
                 job_id,
+                job.depth_estimator_name,
                 job.depth_scale,
                 stream_queue,
+                job.first_frame_detections,
             )
             detection_path, detections_list = result_pkg

jobs/models.py CHANGED Viewed

@@ -33,10 +33,5 @@ class JobInfo:
     first_frame_depth_path: Optional[str] = None
     partial_success: bool = False  # True if one component failed but job completed
     depth_error: Optional[str] = None  # Error message if depth failed
-    enable_gpt: bool = True  # Whether to use GPT for distance estimation
-    # Mission specification (None = LEGACY mode)
-    mission_spec: Optional[Any] = None  # utils.schemas.MissionSpecification
-    mission_mode: str = "LEGACY"  # "MISSION" or "LEGACY"
-    first_frame_gpt_results: Optional[Dict[str, Any]] = None  # Cached GPT results from process_first_frame
     step: int = 7  # Segmentation keyframe step (matches num_maskmem)
     ttfs_t0: Optional[float] = None  # TTFS anchor: time.perf_counter() at endpoint entry

     first_frame_depth_path: Optional[str] = None
     partial_success: bool = False  # True if one component failed but job completed
     depth_error: Optional[str] = None  # Error message if depth failed
     step: int = 7  # Segmentation keyframe step (matches num_maskmem)
     ttfs_t0: Optional[float] = None  # TTFS anchor: time.perf_counter() at endpoint entry

requirements.txt CHANGED Viewed

@@ -7,9 +7,7 @@ python-multipart
 pillow
 huggingface-hub
 ultralytics
-python-dotenv
 einops
-sentence-transformers
 SAM-2 @ git+https://github.com/facebookresearch/sam2.git
 hydra-core>=1.3.2
 iopath>=0.1.10

 pillow
 huggingface-hub
 ultralytics
 einops
 SAM-2 @ git+https://github.com/facebookresearch/sam2.git
 hydra-core>=1.3.2
 iopath>=0.1.10

utils/enrichment.py DELETED Viewed

@@ -1,122 +0,0 @@
-"""
-Shared enrichment workflow — single implementation of the 5-step GPT enrichment
-pipeline used by inference.py (detection + GSAM2) and app.py (first-frame).
-Consolidates duplicated logic from:
-  - inference.py enrichment_thread_fn
-  - inference.py _gsam2_enrichment_thread_fn
-  - app.py _enrich_first_frame_gpt
-"""
-import logging
-from threading import Event
-from typing import Any, Dict, List, Optional
-from utils.gpt_reasoning import estimate_threat_gpt, encode_frame_to_b64
-from utils.relevance import evaluate_relevance, evaluate_relevance_llm
-from utils.schemas import AssessmentStatus
-logger = logging.getLogger(__name__)
-def run_enrichment(
-    frame_idx: int,
-    frame_data,
-    detections: List[Dict[str, Any]],
-    mission_spec,
-    *,
-    first_frame_gpt_results: Optional[Dict] = None,
-    job_id: Optional[str] = None,
-    relevance_refined_event: Optional[Event] = None,
-) -> Optional[Dict[str, Any]]:
-    """Run the shared enrichment workflow (LLM post-filter + GPT threat assessment).
-    Steps:
-        1. LLM post-filter via evaluate_relevance_llm() (if LLM_EXTRACTED mode)
-        2. Signal relevance_refined_event (if provided)
-        3. Check cached GPT results (parameter or JobStorage fallback)
-        4. Call estimate_threat_gpt() if no cache
-        5. Merge results into detections by track_id
-    Args:
-        frame_idx: Index of the frame being enriched.
-        frame_data: OpenCV BGR frame (numpy array).
-        detections: Mutable list of detection dicts to enrich in-place.
-        mission_spec: Optional MissionSpecification.
-        first_frame_gpt_results: Pre-computed GPT results (cache hit).
-        job_id: Job identifier for JobStorage fallback cache lookup.
-        relevance_refined_event: threading.Event to signal when LLM post-filter completes.
-    Returns:
-        GPT results dict (object_id -> assessment), or None if all detections
-        were filtered out.
-    """
-    gpt_dets = detections
-    # --- Step 1: LLM post-filter (LLM_EXTRACTED mode) ---
-    if mission_spec and mission_spec.parse_mode == "LLM_EXTRACTED":
-        unique_labels = list({
-            d.get("label", "").lower()
-            for d in gpt_dets if d.get("label")
-        })
-        relevant_labels = evaluate_relevance_llm(
-            unique_labels, mission_spec.operator_text
-        )
-        mission_spec.relevance_criteria.required_classes = list(relevant_labels)
-        # --- Step 2: Signal writer loop ---
-        if relevance_refined_event is not None:
-            relevance_refined_event.set()
-        logger.info(
-            "Enrichment: LLM post-filter applied on frame %d: relevant=%s",
-            frame_idx, relevant_labels,
-        )
-        # Re-filter with refined classes
-        for d in gpt_dets:
-            decision = evaluate_relevance(d, mission_spec.relevance_criteria)
-            d["mission_relevant"] = decision.relevant
-        gpt_dets = [d for d in gpt_dets if d.get("mission_relevant", True)]
-    elif relevance_refined_event is not None:
-        # Non-LLM mode: signal immediately so writer doesn't block
-        relevance_refined_event.set()
-    if not gpt_dets:
-        return None
-    # --- Step 3: Check cached GPT results ---
-    cached_gpt = first_frame_gpt_results
-    if not cached_gpt and job_id:
-        try:
-            from jobs.storage import get_job_storage as _gjs
-            _job = _gjs().get(job_id)
-            if _job and _job.first_frame_gpt_results:
-                cached_gpt = _job.first_frame_gpt_results
-        except Exception:
-            pass
-    # --- Step 4: Call GPT if no cache ---
-    if cached_gpt:
-        logger.info("Enrichment: re-using cached GPT results for frame %d", frame_idx)
-        gpt_res = cached_gpt
-    else:
-        logger.info("Enrichment: running GPT estimation for frame %d...", frame_idx)
-        frame_b64 = encode_frame_to_b64(frame_data)
-        gpt_res = estimate_threat_gpt(
-            detections=gpt_dets, mission_spec=mission_spec,
-            image_b64=frame_b64,
-        )
-    # --- Step 5: Merge results into detections by track_id ---
-    for d in gpt_dets:
-        oid = d.get("track_id")
-        if oid and oid in gpt_res:
-            gpt_payload = gpt_res[oid]
-            d.update(gpt_payload)
-            d["gpt_raw"] = gpt_payload
-            d["assessment_frame_index"] = frame_idx
-            d["assessment_status"] = gpt_payload.get(
-                "assessment_status", AssessmentStatus.ASSESSED
-            )
-    return gpt_res

utils/gpt_reasoning.py DELETED Viewed

@@ -1,374 +0,0 @@
-import re
-import json
-import base64
-import logging
-from typing import List, Dict, Any, Optional
-from utils.schemas import AssessmentStatus
-from utils.openai_client import chat_completion, extract_content, get_api_key
-logger = logging.getLogger(__name__)
-def encode_image(image_path: str) -> str:
-    with open(image_path, "rb") as image_file:
-        return base64.b64encode(image_file.read()).decode('utf-8')
-def encode_frame_to_b64(frame, quality=None) -> str:
-    """Encode an OpenCV BGR frame to a base64 JPEG string in memory (no disk I/O).
-    Args:
-        frame: OpenCV BGR numpy array.
-        quality: Optional JPEG quality (1-100). Uses OpenCV default if None.
-    """
-    import cv2
-    params = [int(cv2.IMWRITE_JPEG_QUALITY), quality] if quality is not None else None
-    success, buf = cv2.imencode('.jpg', frame, params) if params else cv2.imencode('.jpg', frame)
-    if not success:
-        raise ValueError("Failed to encode frame to JPEG")
-    return base64.b64encode(buf.tobytes()).decode('utf-8')
-_DOMAIN_ROLES = {
-    "NAVAL": "Naval Intelligence Officer and Maritime Threat Analyst",
-    "GROUND": "Ground Surveillance Intelligence Officer",
-    "AERIAL": "Air Surveillance Intelligence Officer",
-    "URBAN": "Urban Surveillance Intelligence Officer",
-    "GENERIC": "Tactical Surveillance Analyst",
-}
-_HUMAN_LABEL_HINTS = frozenset({
-    "person", "people", "human", "pedestrian",
-    "man", "woman", "boy", "girl", "child",
-    "civilian", "soldier", "infantry", "troop", "trooper",
-})
-def _is_human_label(label: str) -> bool:
-    label_l = (label or "").lower().strip()
-    if not label_l:
-        return False
-    parts = [p for p in re.split(r"[^a-z0-9]+", label_l) if p]
-    return any(part in _HUMAN_LABEL_HINTS for part in parts)
-def _build_status_fallback(
-    object_ids: List[str],
-    status: str,
-    reason: str,
-) -> Dict[str, Dict[str, Any]]:
-    return {
-        obj_id: {
-            "assessment_status": status,
-            "gpt_reason": reason,
-        }
-        for obj_id in object_ids
-    }
-_UNIVERSAL_SCHEMA = (
-    "RESPONSE SCHEMA (JSON):\n"
-    "{\n"
-    "  \"objects\": {\n"
-    "    \"T01\": {\n"
-    "      \"object_type\": \"string (broad category, e.g. Warship, APC, Sedan, Person)\",\n"
-    "      \"size\": \"string (e.g. Large, Medium, Small, ~50m length)\",\n"
-    "      \"visible_weapons\": [\"string\"],\n"
-    "      \"weapon_readiness\": \"string (e.g. Stowed/PEACE, Trained/Aiming, Firing/HOSTILE, Unknown)\",\n"
-    "      \"motion_status\": \"string (e.g. Stationary, Moving Slow, Moving Fast, Hovering)\",\n"
-    "      \"range_estimate\": \"string (e.g. ~500m, ~2NM, ~1km)\",\n"
-    "      \"bearing\": \"string (e.g. 12 o'clock, NNE, 045°)\",\n"
-    "      \"threat_level\": int (1-10, 1=Benign, 10=Imminent Attack),\n"
-    "      \"threat_classification\": \"Friendly\" | \"Neutral\" | \"Suspect\" | \"Hostile\",\n"
-    "      \"tactical_intent\": \"string (e.g. Transit, Patrol, Attack Profile)\",\n"
-    "      \"dynamic_features\": [\n"
-    "        {\"key\": \"string (domain-specific observation name)\", \"value\": \"string\"}\n"
-    "      ]  // up to 5 extra observations relevant to the domain\n"
-    "    }\n"
-    "  }\n"
-    "}\n"
-)
-def _parse_range_to_meters(range_text: str) -> Optional[float]:
-    """Convert a free-text range string to meters.
-    Supports patterns like '~500m', '~2NM', '~1.5km', '500 meters', '2 nautical miles'.
-    Returns None if the string cannot be parsed.
-    """
-    if not range_text or range_text == "Unknown":
-        return None
-    text = range_text.strip().lstrip("~").strip()
-    # Try NM / nautical miles
-    m = re.match(r"([0-9]*\.?[0-9]+)\s*(NM|nm|nautical\s*miles?)", text)
-    if m:
-        return float(m.group(1)) * 1852.0
-    # Try km / kilometers
-    m = re.match(r"([0-9]*\.?[0-9]+)\s*(km|kilometers?|kilometres?)", text, re.IGNORECASE)
-    if m:
-        return float(m.group(1)) * 1000.0
-    # Try meters (default)
-    m = re.match(r"([0-9]*\.?[0-9]+)\s*(m|meters?|metres?)?$", text, re.IGNORECASE)
-    if m:
-        return float(m.group(1))
-    return None
-def _build_domain_system_prompt(domain: str, mission_spec=None) -> str:
-    """Build a universal system prompt with domain-appropriate role."""
-    # Mission context block (injected regardless of domain)
-    mission_context = ""
-    if mission_spec:
-        mission_context = (
-            "\n\nMISSION CONTEXT:\n"
-            f"- Operator Intent: {mission_spec.mission_intent}\n"
-            f"- Domain: {mission_spec.domain}\n"
-            f"- Target Classes: {', '.join(mission_spec.object_classes)}\n"
-        )
-        if mission_spec.context_phrases:
-            mission_context += f"- Situational Context: {'; '.join(mission_spec.context_phrases)}\n"
-        if mission_spec.stripped_modifiers:
-            mission_context += f"- Operator Modifiers (stripped): {', '.join(mission_spec.stripped_modifiers)}\n"
-        mission_context += (
-            "\nUse the mission context to inform your analysis. "
-            "Focus assessment on the target classes and domain specified."
-        )
-    role = _DOMAIN_ROLES.get(domain, _DOMAIN_ROLES["GENERIC"])
-    return (
-        f"You are an elite {role}. "
-        "Your task is to analyze optical surveillance imagery and provide a detailed tactical assessment for every detected object. "
-        f"You must output a STRICT JSON object that matches the following schema for every object ID provided:\n\n"
-        f"{_UNIVERSAL_SCHEMA}\n"
-        "RULES:\n"
-        "- Use dynamic_features for domain-specific observations (e.g., wake_description, deck_activity, sensor_profile, camouflage, license_plate).\n"
-        "- Provide up to 5 dynamic_features per object. Choose the most tactically relevant observations.\n"
-        "- range_estimate should be a human-readable string with units (e.g., '~500m', '~2NM').\n"
-        "- Visible trained weapons are IMMINENT threat (Score 9-10).\n"
-        "- Ignore artifacts, focus on the objects."
-        + mission_context
-    )
-def estimate_threat_gpt(
-    image_path: Optional[str] = None,
-    detections: Optional[List[Dict[str, Any]]] = None,
-    mission_spec=None,  # Optional[MissionSpecification]
-    image_b64: Optional[str] = None,
-) -> Dict[str, Any]:
-    """
-    Perform Threat Assessment on detected objects using GPT-4o.
-    Args:
-        image_path: Path to the image file (mutually exclusive with image_b64).
-        detections: List of detection dicts (bbox, label, etc.).
-        mission_spec: Optional MissionSpecification for domain-aware assessment.
-        image_b64: Pre-encoded base64 JPEG string (avoids disk round-trip).
-    Returns:
-        Dict mapping object ID (e.g., T01) to threat assessment dict.
-    """
-    if detections is None:
-        detections = []
-    if not get_api_key():
-        logger.error("OPENAI_API_KEY not set. Skipping GPT threat assessment.")
-        return {}
-    # 1. Prepare detections summary for prompt.
-    # Human/person classes are explicitly skipped to avoid refusal paths.
-    prompt_items = []
-    skipped_human_ids: List[str] = []
-    for i, det in enumerate(detections):
-        obj_id = str(det.get("track_id") or det.get("id") or f"T{str(i+1).zfill(2)}")
-        bbox = det.get("bbox", [])
-        label = str(det.get("label", "object"))
-        if _is_human_label(label):
-            skipped_human_ids.append(obj_id)
-            continue
-        prompt_items.append({"obj_id": obj_id, "label": label, "bbox": bbox})
-    det_text = "\n".join(
-        [
-            f"- ID: {it['obj_id']}, Classification Hint: {it['label']}, BBox: {it['bbox']}"
-            for it in prompt_items
-        ]
-    )
-    if not det_text:
-        if skipped_human_ids:
-            logger.warning(
-                "Skipping GPT threat assessment for %d human/person detections due policy constraints.",
-                len(skipped_human_ids),
-            )
-            return _build_status_fallback(
-                skipped_human_ids,
-                AssessmentStatus.SKIPPED_POLICY,
-                "Human/person analysis skipped due policy constraints.",
-            )
-        return {}
-    # 2. Encode image (prefer pre-encoded b64 to avoid disk I/O)
-    if image_b64:
-        base64_image = image_b64
-    elif image_path:
-        try:
-            base64_image = encode_image(image_path)
-        except Exception as e:
-            logger.error(f"Failed to encode image for GPT: {e}")
-            return {}
-    else:
-        logger.error("estimate_threat_gpt: no image_path or image_b64 provided")
-        return {}
-    # 3. Domain-aware prompt selection (INV-7)
-    domain = "GENERIC"  # default — universal schema works for all domains
-    if mission_spec:
-        domain = mission_spec.domain
-        if mission_spec.domain_source == "INFERRED":
-            logger.info("GPT assessment using inferred domain=%s (domain_inferred=True)", domain)
-    system_prompt = _build_domain_system_prompt(domain, mission_spec)
-    domain_label = domain.lower() if domain != "NAVAL" else "naval"
-    user_prompt = (
-        f"Analyze this {domain_label} surveillance image. The following objects have been detected:\n"
-        f"{det_text}\n\n"
-        f"Provide a detailed Threat Assessment for each object based on its visual signatures."
-    )
-    # 4. Call API
-    payload = {
-        "model": "gpt-4o", # Use 4o for better vision analysis
-        "messages": [
-            {
-                "role": "system",
-                "content": system_prompt
-            },
-            {
-                "role": "user",
-                "content": [
-                    {
-                        "type": "text",
-                        "text": user_prompt
-                    },
-                    {
-                        "type": "image_url",
-                        "image_url": {
-                            "url": f"data:image/jpeg;base64,{base64_image}",
-                            "detail": "low"
-                        }
-                    }
-                ]
-            }
-        ],
-        "max_tokens": 1500,
-        "temperature": 0.2, # Low temp for factual consistency
-        "response_format": { "type": "json_object" }
-    }
-    try:
-        resp_data = chat_completion(payload)
-        content, refusal = extract_content(resp_data)
-        if not content:
-            if refusal:
-                logger.warning("GPT refused threat assessment: %s", refusal)
-            else:
-                logger.warning(
-                    "GPT returned empty content. response_id=%s finish_reason=%s",
-                    resp_data.get("id"),
-                    resp_data.get("choices", [{}])[0].get("finish_reason"),
-                )
-            fallback = _build_status_fallback(
-                [it["obj_id"] for it in prompt_items],
-                AssessmentStatus.REFUSED,
-                refusal or "GPT returned empty content.",
-            )
-            fallback.update(
-                _build_status_fallback(
-                    skipped_human_ids,
-                    AssessmentStatus.SKIPPED_POLICY,
-                    "Human/person analysis skipped due policy constraints.",
-                )
-            )
-            return fallback
-        result_json = json.loads(content)
-        objects = result_json.get("objects", {})
-        if not isinstance(objects, dict):
-            logger.warning(
-                "GPT response 'objects' field is not a dict (got %s); using fallback.",
-                type(objects).__name__,
-            )
-            objects = {}
-        # Ensure every requested object receives an explicit assessment state.
-        for it in prompt_items:
-            oid = it["obj_id"]
-            if oid not in objects:
-                objects[oid] = {
-                    "assessment_status": AssessmentStatus.NO_RESPONSE,
-                    "gpt_reason": "No structured assessment returned for object.",
-                }
-        for oid in skipped_human_ids:
-            objects.setdefault(
-                oid,
-                {
-                    "assessment_status": AssessmentStatus.SKIPPED_POLICY,
-                    "gpt_reason": "Human/person analysis skipped due policy constraints.",
-                },
-            )
-        # Polyfill legacy fields for frontend compatibility
-        for obj_id, data in objects.items():
-            if not isinstance(data, dict):
-                data = {
-                    "assessment_status": AssessmentStatus.NO_RESPONSE,
-                    "gpt_reason": "Malformed object payload from GPT.",
-                }
-                objects[obj_id] = data
-            # 1. Distance: parse free-text range_estimate to meters
-            range_m = _parse_range_to_meters(data.get("range_estimate", ""))
-            if range_m is not None:
-                data["distance_m"] = range_m
-                data["gpt_distance_m"] = range_m
-            # 2. Direction (legacy alias)
-            bearing = data.get("bearing", "")
-            if bearing and bearing != "Unknown":
-                data["direction"] = bearing
-                data["gpt_direction"] = bearing
-            # 3. Description (summary of new fields)
-            obj_type = data.get("object_type", "Unknown")
-            threat = data.get("threat_classification", "Unknown")
-            score = data.get("threat_level", 0)
-            desc_parts = [obj_type]
-            desc_parts.append(f"[{threat.upper()} Lvl:{score}]")
-            data["description"] = " ".join(desc_parts)
-            data["gpt_description"] = data["description"]
-            # 4. Legacy threat_level_score alias
-            data["threat_level_score"] = data.get("threat_level", 0)
-        return objects
-    except Exception as e:
-        logger.error("GPT API call failed: %s", e, exc_info=True)
-        fallback = _build_status_fallback(
-            [it["obj_id"] for it in prompt_items],
-            AssessmentStatus.ERROR,
-            f"GPT API call failed: {e.__class__.__name__}",
-        )
-        fallback.update(
-            _build_status_fallback(
-                skipped_human_ids,
-                AssessmentStatus.SKIPPED_POLICY,
-                "Human/person analysis skipped due policy constraints.",
-            )
-        )
-        return fallback

utils/mission_parser.py DELETED Viewed

@@ -1,481 +0,0 @@
-"""
-Mission text parser — converts raw operator text into a validated MissionSpecification.
-Single public function: parse_mission_text(raw_text, detector_key) -> MissionSpecification
-Internal flow:
-1. Fast-path regex check -> skip LLM if comma-separated labels
-2. LLM extraction call (GPT-4o, temperature 0.0)
-3. Deterministic validation pipeline
-4. COCO vocabulary mapping for COCO-only detectors
-5. Build RelevanceCriteria deterministically from mapped classes
-6. Return validated MissionSpecification or raise MissionParseError
-"""
-import json
-import logging
-import re
-from typing import List, Optional
-from utils.openai_client import chat_completion, extract_content, get_api_key, OpenAIAPIError
-from coco_classes import COCO_CLASSES, canonicalize_coco_name, coco_class_catalog
-from utils.schemas import MissionSpecification, RelevanceCriteria
-logger = logging.getLogger(__name__)
-# Detectors that only support COCO class vocabulary
-_COCO_ONLY_DETECTORS = frozenset({"yolo11", "detr_resnet50"})
-class MissionParseError(ValueError):
-    """Raised when mission text cannot be parsed into a valid MissionSpecification."""
-    def __init__(self, message: str, warnings: Optional[List[str]] = None):
-        self.warnings = warnings or []
-        super().__init__(message)
-def _is_comma_separated_labels(text: str) -> bool:
-    """Fast-path: detect simple comma-separated class labels (no LLM needed)."""
-    # Match: word tokens separated by commas, each token <= 3 words
-    pattern = r"^[\w\s]+(,\s*[\w\s]+)*$"
-    if not re.match(pattern, text.strip()):
-        return False
-    tokens = [t.strip() for t in text.split(",") if t.strip()]
-    return all(len(t.split()) <= 3 for t in tokens)
-def _is_coco_only(detector_key: str) -> bool:
-    return detector_key in _COCO_ONLY_DETECTORS
-def _map_coco_classes(
-    object_classes: List[str], detector_key: str
-) -> tuple[List[str], List[str], List[str]]:
-    """Map object classes to COCO vocabulary for COCO-only detectors.
-    Returns:
-        (mapped_classes, unmappable_classes, warnings)
-    """
-    if not _is_coco_only(detector_key):
-        return object_classes, [], []
-    mapped = []
-    unmappable = []
-    warnings = []
-    seen = set()
-    for cls in object_classes:
-        canonical = canonicalize_coco_name(cls)
-        if canonical is not None:
-            if canonical not in seen:
-                mapped.append(canonical)
-                seen.add(canonical)
-            if canonical.lower() != cls.lower():
-                warnings.append(
-                    f"'{cls}' mapped to COCO class '{canonical}'."
-                )
-        else:
-            unmappable.append(cls)
-            warnings.append(
-                f"'{cls}' is not in COCO vocabulary. Will not be detected by {detector_key}."
-            )
-    return mapped, unmappable, warnings
-def _build_fast_path_spec(
-    raw_text: str, object_classes: List[str], detector_key: str
-) -> MissionSpecification:
-    """Build MissionSpecification for simple comma-separated input (no LLM call)."""
-    mapped, unmappable, warnings = _map_coco_classes(object_classes, detector_key)
-    if _is_coco_only(detector_key) and not mapped:
-        raise MissionParseError(
-            f"None of the requested objects ({', '.join(object_classes)}) match the "
-            f"{detector_key} vocabulary. This detector supports: "
-            f"{coco_class_catalog()}. "
-            f"Use an open-vocabulary detector (Grounding DINO) or adjust your mission.",
-            warnings=warnings,
-        )
-    final_classes = mapped if _is_coco_only(detector_key) else object_classes
-    return MissionSpecification(
-        object_classes=final_classes,
-        mission_intent="DETECT",
-        domain="GENERIC",
-        domain_source="INFERRED",
-        relevance_criteria=RelevanceCriteria(
-            required_classes=final_classes,
-            min_confidence=0.0,
-        ),
-        context_phrases=[],
-        stripped_modifiers=[],
-        operator_text=raw_text,
-        parse_mode="FAST_PATH",
-        parse_confidence="HIGH",
-        parse_warnings=warnings,
-    )
-# --- LLM Extraction ---
-_SYSTEM_PROMPT = (
-    "You are a mission text parser for an object detection system. Your ONLY job is to extract "
-    "structured fields from operator mission text. You do NOT assess threats. You do NOT reason "
-    "about tactics. You extract and classify.\n\n"
-    "OUTPUT SCHEMA (strict JSON):\n"
-    "{\n"
-    '  "object_classes": ["string"],\n'
-    '  "mission_intent": "ENUM",\n'
-    '  "domain": "ENUM",\n'
-    '  "context_phrases": ["string"],\n'
-    '  "stripped_modifiers": ["string"],\n'
-    '  "parse_confidence": "ENUM",\n'
-    '  "parse_warnings": ["string"]\n'
-    "}\n\n"
-    "EXTRACTION RULES:\n\n"
-    "1. OBJECT_CLASSES — What to extract:\n"
-    "   - Extract nouns and noun phrases that refer to PHYSICAL, VISUALLY DETECTABLE objects.\n"
-    "   - Keep visual descriptors that narrow the category: 'small boat', 'military vehicle', 'cargo ship'.\n"
-    "   - Use singular form: 'vessels' -> 'vessel', 'people' -> 'person'.\n"
-    "   - If the input is already comma-separated class labels (e.g., 'person, car, boat'),\n"
-    "     use them directly without modification.\n\n"
-    "2. OBJECT_CLASSES — What to strip:\n"
-    "   - Remove threat/intent adjectives: 'hostile', 'suspicious', 'friendly', 'dangerous', 'enemy'.\n"
-    "     -> Move these to stripped_modifiers.\n"
-    "   - Remove action verbs: 'approaching', 'fleeing', 'attacking'.\n"
-    "     -> Move the full phrase to context_phrases.\n"
-    "   - Remove spatial/temporal phrases: 'from the east', 'near the harbor', 'at night'.\n"
-    "     -> Move to context_phrases.\n"
-    "   - Do NOT extract abstract concepts: 'threat', 'danger', 'hazard', 'risk' are not objects.\n\n"
-    "3. MISSION_INTENT — Infer from verbs:\n"
-    "   - 'detect', 'find', 'locate', 'spot', 'search for' -> DETECT\n"
-    "   - 'classify', 'identify', 'determine type of' -> CLASSIFY\n"
-    "   - 'track', 'follow', 'monitor movement of' -> TRACK\n"
-    "   - 'assess threat', 'evaluate danger', 'threat assessment' -> ASSESS_THREAT\n"
-    "   - 'monitor', 'watch', 'observe', 'surveil' -> MONITOR\n"
-    "   - If no verb present (bare class list), default to DETECT.\n\n"
-    "4. DOMAIN — Infer from contextual clues:\n"
-    "   - Maritime vocabulary (vessel, ship, boat, harbor, naval, maritime, wake, sea) -> NAVAL\n"
-    "   - Ground vocabulary (vehicle, convoy, checkpoint, road, building, infantry) -> GROUND\n"
-    "   - Aerial vocabulary (aircraft, drone, UAV, airspace, altitude, flight) -> AERIAL\n"
-    "   - Urban vocabulary (pedestrian, intersection, storefront, crowd, building) -> URBAN\n"
-    "   - If no domain clues present -> GENERIC\n\n"
-    "5. PARSE_CONFIDENCE:\n"
-    "   - HIGH: Clear object classes extracted, domain identifiable.\n"
-    "   - MEDIUM: Some ambiguity but reasonable extraction possible. Include warnings.\n"
-    "   - LOW: Cannot extract meaningful object classes. Input is too abstract,\n"
-    "     contradictory, or contains no visual object references.\n"
-    "     Examples of LOW: 'keep us safe', 'do your job', 'analyze everything'.\n\n"
-    "FORBIDDEN:\n"
-    "- Do NOT infer object classes not implied by the text. If the text says 'boats',\n"
-    "  do not add 'person' or 'vehicle' unless mentioned.\n"
-    "- Do NOT add threat scores, engagement rules, or tactical recommendations.\n"
-    "- Do NOT interpret what 'threat' or 'danger' means in terms of specific objects.\n"
-    "  If the operator writes 'detect threats', set parse_confidence to LOW and warn:\n"
-    "  \"'threats' is not a visual object class. Specify what objects to detect.\""
-)
-_VISION_GROUNDING_ADDENDUM = (
-    "\n\nVISION GROUNDING (when an image is provided):\n"
-    "You may receive the first frame of the operator's video feed as an image.\n"
-    "Use it to REFINE your object_classes extraction:\n\n"
-    "1. If the operator uses a general term (e.g., 'vessels', 'vehicles'),\n"
-    "   inspect the image and add MORE SPECIFIC subcategories visible in the scene.\n"
-    "   Example: operator says 'detect vessels', image shows a speedboat and a cargo ship\n"
-    "   -> object_classes: ['vessel', 'speedboat', 'cargo ship']\n\n"
-    "2. If the operator mentions objects NOT visible in the first frame,\n"
-    "   still include them (later frames may contain them), but add a\n"
-    "   parse_warning noting they were not visible in the first frame.\n\n"
-    "3. Use the image to CONFIRM or REFINE the domain. If the text is ambiguous\n"
-    "   but the image clearly shows open water, set domain to NAVAL.\n\n"
-    "4. Do NOT hallucinate objects. Only add specific subcategories if clearly\n"
-    "   identifiable. When uncertain, keep the general term.\n\n"
-    "5. The same OUTPUT SCHEMA and all EXTRACTION RULES still apply.\n"
-    "   The image is supplementary context, not a replacement for the text.\n"
-)
-def _extract_and_encode_first_frame(video_path: Optional[str]) -> Optional[str]:
-    """Extract the first frame from a video and return it as a base64-encoded JPEG.
-    Never raises — returns None on any failure so the caller can fall back
-    to text-only parsing.
-    """
-    if not video_path:
-        return None
-    try:
-        from inference import extract_first_frame
-        from utils.gpt_reasoning import encode_frame_to_b64
-        frame, _fps, _w, _h = extract_first_frame(video_path)
-        return encode_frame_to_b64(frame, quality=85)
-    except Exception:
-        logger.warning("Failed to extract/encode first frame for vision grounding", exc_info=True)
-        return None
-def _call_extraction_llm(raw_text: str, detector_key: str, first_frame_b64: Optional[str] = None) -> dict:
-    """Call GPT-4o to extract structured mission fields from natural language."""
-    if not get_api_key():
-        raise MissionParseError(
-            "OPENAI_API_KEY not set. Cannot parse natural language mission text. "
-            "Use comma-separated class labels instead (e.g., 'person, car, boat')."
-        )
-    detector_type = "COCO_ONLY" if _is_coco_only(detector_key) else "OPEN_VOCAB"
-    user_prompt_text = (
-        f'OPERATOR MISSION TEXT:\n"{raw_text}"\n\n'
-        f"DETECTOR TYPE: {detector_type}\n\n"
-        "Extract the structured mission specification from the above text."
-    )
-    # Build system prompt (append vision addendum when image is available)
-    system_content = _SYSTEM_PROMPT
-    if first_frame_b64:
-        system_content = _SYSTEM_PROMPT + _VISION_GROUNDING_ADDENDUM
-    # Build user message: mixed content array when image is available, plain string otherwise
-    if first_frame_b64:
-        user_message = {
-            "role": "user",
-            "content": [
-                {"type": "text", "text": user_prompt_text},
-                {
-                    "type": "image_url",
-                    "image_url": {
-                        "url": f"data:image/jpeg;base64,{first_frame_b64}",
-                        "detail": "low",
-                    },
-                },
-            ],
-        }
-    else:
-        user_message = {"role": "user", "content": user_prompt_text}
-    max_tokens = 700 if first_frame_b64 else 500
-    timeout_s = 45 if first_frame_b64 else 30
-    payload = {
-        "model": "gpt-4o",
-        "temperature": 0.0,
-        "max_tokens": max_tokens,
-        "response_format": {"type": "json_object"},
-        "messages": [
-            {"role": "system", "content": system_content},
-            user_message,
-        ],
-    }
-    try:
-        resp_data = chat_completion(payload, timeout=timeout_s)
-        content, _refusal = extract_content(resp_data)
-        if not content:
-            raise MissionParseError("GPT returned empty content during mission parsing.")
-        return json.loads(content)
-    except OpenAIAPIError as e:
-        raise MissionParseError(f"Mission parsing API call failed: {e}")
-    except json.JSONDecodeError:
-        raise MissionParseError(
-            "GPT returned invalid JSON. Please rephrase your mission."
-        )
-def _validate_and_build(
-    llm_output: dict, raw_text: str, detector_key: str
-) -> MissionSpecification:
-    """Deterministic validation pipeline (Section 7.3 decision tree)."""
-    # Step 2: Extract fields with defaults
-    object_classes = llm_output.get("object_classes", [])
-    mission_intent = llm_output.get("mission_intent", "DETECT")
-    domain = llm_output.get("domain", "GENERIC")
-    context_phrases = llm_output.get("context_phrases", [])
-    stripped_modifiers = llm_output.get("stripped_modifiers", [])
-    parse_confidence = llm_output.get("parse_confidence", "LOW")
-    parse_warnings = llm_output.get("parse_warnings", [])
-    # Validate enum values
-    valid_intents = {"DETECT", "CLASSIFY", "TRACK", "ASSESS_THREAT", "MONITOR"}
-    if mission_intent not in valid_intents:
-        mission_intent = "DETECT"
-        parse_warnings.append(f"Invalid mission_intent '{llm_output.get('mission_intent')}', defaulted to DETECT.")
-    valid_domains = {"NAVAL", "GROUND", "AERIAL", "URBAN", "GENERIC"}
-    if domain not in valid_domains:
-        domain = "GENERIC"
-        parse_warnings.append(f"Invalid domain '{llm_output.get('domain')}', defaulted to GENERIC.")
-    valid_confidence = {"HIGH", "MEDIUM", "LOW"}
-    if parse_confidence not in valid_confidence:
-        parse_confidence = "LOW"
-    # Step 3: Parse confidence check
-    if parse_confidence == "LOW":
-        warnings_str = "; ".join(parse_warnings) if parse_warnings else "No details"
-        raise MissionParseError(
-            f"Could not extract object classes from mission text. "
-            f"Warnings: {warnings_str}. "
-            f"Please specify concrete objects to detect (e.g., 'vessel, small boat').",
-            warnings=parse_warnings,
-        )
-    # Validate object_classes is non-empty
-    if not object_classes:
-        raise MissionParseError(
-            "Mission text produced no detectable object classes. "
-            "Please specify concrete objects to detect.",
-            warnings=parse_warnings,
-        )
-    # Filter out empty strings
-    object_classes = [c.strip() for c in object_classes if c and c.strip()]
-    if not object_classes:
-        raise MissionParseError(
-            "All extracted object classes were empty after cleanup.",
-            warnings=parse_warnings,
-        )
-    # Step 4: COCO vocabulary mapping
-    mapped, unmappable, coco_warnings = _map_coco_classes(object_classes, detector_key)
-    parse_warnings.extend(coco_warnings)
-    if _is_coco_only(detector_key):
-        if not mapped:
-            raise MissionParseError(
-                f"None of the requested objects ({', '.join(object_classes)}) match the "
-                f"{detector_key} vocabulary. "
-                f"This detector supports: {coco_class_catalog()}. "
-                f"Use an open-vocabulary detector (Grounding DINO) or adjust your mission.",
-                warnings=parse_warnings,
-            )
-        final_classes = mapped
-    else:
-        final_classes = object_classes
-    # Step 5: Build RelevanceCriteria deterministically
-    relevance_criteria = RelevanceCriteria(
-        required_classes=final_classes,
-        min_confidence=0.0,
-    )
-    # Step 6: Construct MissionSpecification
-    return MissionSpecification(
-        object_classes=final_classes,
-        mission_intent=mission_intent,
-        domain=domain,
-        domain_source="INFERRED",
-        relevance_criteria=relevance_criteria,
-        # INVARIANT INV-13: context_phrases are forwarded to LLM reasoning layers
-        # (GPT threat assessment, threat chat) as situational context ONLY.
-        # They must NEVER be used in evaluate_relevance(), prioritization,
-        # or any deterministic filtering/sorting logic.
-        context_phrases=context_phrases,
-        stripped_modifiers=stripped_modifiers,
-        operator_text=raw_text,
-        parse_mode="LLM_EXTRACTED",
-        parse_confidence=parse_confidence,
-        parse_warnings=parse_warnings,
-    )
-_DOMAIN_BROAD_CATEGORIES: dict[str, List[str]] = {
-    "NAVAL": ["vessel", "ship", "boat", "buoy", "person"],
-    "AERIAL": ["aircraft", "helicopter", "drone", "airplane"],
-    "GROUND": ["vehicle", "car", "truck", "person", "building"],
-    "URBAN": ["person", "vehicle", "car", "bicycle"],
-    "GENERIC": ["object"],
-}
-def build_broad_queries(
-    detector_key: str, mission_spec: MissionSpecification
-) -> List[str]:
-    """Build broad detector queries for LLM post-filter mode.
-    For FAST_PATH: return object_classes directly (unchanged behavior).
-    For COCO detectors (LLM_EXTRACTED): return ALL 80 COCO classes.
-    For open-vocab detectors (LLM_EXTRACTED): return LLM-extracted classes
-    PLUS broad domain categories to maximize recall.
-    """
-    if mission_spec.parse_mode == "FAST_PATH":
-        return mission_spec.object_classes
-    # LLM_EXTRACTED path: detect broadly
-    if _is_coco_only(detector_key):
-        # COCO detectors ignore queries anyway (DETR detects all 80;
-        # YOLO11 falls back to all if no matches). Send everything.
-        return list(COCO_CLASSES)
-    # Open-vocab detector (e.g. Grounding DINO):
-    # Combine LLM-extracted classes with domain-specific broad categories
-    broad = list(mission_spec.object_classes)
-    domain_extras = _DOMAIN_BROAD_CATEGORIES.get(
-        mission_spec.domain, _DOMAIN_BROAD_CATEGORIES["GENERIC"]
-    )
-    seen = {c.lower() for c in broad}
-    for cat in domain_extras:
-        if cat.lower() not in seen:
-            broad.append(cat)
-            seen.add(cat.lower())
-    logger.info("Broad queries for %s: %s", detector_key, broad)
-    return broad
-def parse_mission_text(
-    raw_text: str,
-    detector_key: str,
-    video_path: Optional[str] = None,
-) -> MissionSpecification:
-    """Parse raw mission text into a validated MissionSpecification.
-    Args:
-        raw_text: Verbatim mission text from the operator.
-        detector_key: Detector model key (determines COCO vocabulary constraints).
-        video_path: Optional path to input video; first frame used for vision grounding.
-    Returns:
-        Validated MissionSpecification.
-    Raises:
-        MissionParseError: If mission text cannot produce a valid specification.
-    """
-    if not raw_text or not raw_text.strip():
-        raise MissionParseError(
-            "Mission text is empty. Specify objects to detect or use the default queries."
-        )
-    raw_text = raw_text.strip()
-    # Fast-path: simple comma-separated labels -> skip LLM
-    if _is_comma_separated_labels(raw_text):
-        object_classes = [t.strip() for t in raw_text.split(",") if t.strip()]
-        logger.info(
-            "Mission fast-path: comma-separated labels %s", object_classes
-        )
-        return _build_fast_path_spec(raw_text, object_classes, detector_key)
-    # LLM path: natural language mission text
-    logger.info("Mission LLM-path: extracting from natural language")
-    first_frame_b64 = _extract_and_encode_first_frame(video_path)
-    if first_frame_b64:
-        logger.info("Vision grounding: first frame encoded for LLM call")
-    llm_output = _call_extraction_llm(raw_text, detector_key, first_frame_b64=first_frame_b64)
-    logger.info("Mission LLM extraction result: %s", llm_output)
-    mission_spec = _validate_and_build(llm_output, raw_text, detector_key)
-    logger.info(
-        "Mission parsed: classes=%s intent=%s domain=%s(%s) confidence=%s",
-        mission_spec.object_classes,
-        mission_spec.mission_intent,
-        mission_spec.domain,
-        mission_spec.domain_source,
-        mission_spec.parse_confidence,
-    )
-    return mission_spec

utils/openai_client.py DELETED Viewed

@@ -1,80 +0,0 @@
-"""
-Shared OpenAI HTTP client — single implementation of the chat-completions call.
-Replaces duplicated urllib boilerplate in gpt_reasoning, relevance,
-mission_parser, and threat_chat.
-"""
-import json
-import logging
-import os
-import urllib.request
-import urllib.error
-from typing import Dict, Optional, Tuple
-logger = logging.getLogger(__name__)
-_API_URL = "https://api.openai.com/v1/chat/completions"
-class OpenAIAPIError(Exception):
-    """Raised when the OpenAI API call fails (HTTP or network error)."""
-    def __init__(self, message: str, status_code: Optional[int] = None):
-        self.status_code = status_code
-        super().__init__(message)
-def get_api_key() -> Optional[str]:
-    """Return the OpenAI API key from the environment, or None."""
-    return os.environ.get("OPENAI_API_KEY")
-def chat_completion(payload: Dict, *, timeout: int = 30) -> Dict:
-    """Send a chat-completion request and return the parsed JSON response.
-    Args:
-        payload: Full request body (model, messages, etc.).
-        timeout: HTTP timeout in seconds.
-    Returns:
-        Parsed response dict.
-    Raises:
-        OpenAIAPIError: On HTTP or network failure.
-    """
-    api_key = get_api_key()
-    if not api_key:
-        raise OpenAIAPIError("OPENAI_API_KEY not set")
-    headers = {
-        "Content-Type": "application/json",
-        "Authorization": f"Bearer {api_key}",
-    }
-    try:
-        req = urllib.request.Request(
-            _API_URL,
-            data=json.dumps(payload).encode("utf-8"),
-            headers=headers,
-            method="POST",
-        )
-        with urllib.request.urlopen(req, timeout=timeout) as response:
-            return json.loads(response.read().decode("utf-8"))
-    except urllib.error.HTTPError as e:
-        raise OpenAIAPIError(
-            f"HTTP {e.code}: {e.reason}", status_code=e.code
-        ) from e
-    except urllib.error.URLError as e:
-        raise OpenAIAPIError(f"URL error: {e.reason}") from e
-def extract_content(resp_data: Dict) -> Tuple[Optional[str], Optional[str]]:
-    """Safely extract content and refusal from a chat-completion response.
-    Returns:
-        (content, refusal) — either may be None.
-    """
-    choice = resp_data.get("choices", [{}])[0]
-    message = choice.get("message", {})
-    return message.get("content"), message.get("refusal")

utils/relevance.py DELETED Viewed

@@ -1,141 +0,0 @@
-"""
-Object relevance evaluation — deterministic gate between detection and GPT assessment.
-Public functions:
-  evaluate_relevance(detection, criteria) -> RelevanceDecision  (deterministic)
-  evaluate_relevance_llm(detected_labels, mission_text) -> set[str]  (LLM post-filter)
-INVARIANT INV-13 enforcement: evaluate_relevance() accepts RelevanceCriteria, NOT
-MissionSpecification. It cannot see context_phrases, stripped_modifiers, or any
-LLM-derived field. This is structural, not by convention.
-"""
-import json
-import logging
-from typing import Any, Dict, List, NamedTuple, Set
-from utils.openai_client import chat_completion, extract_content, get_api_key, OpenAIAPIError
-from coco_classes import canonicalize_coco_name
-from utils.schemas import RelevanceCriteria
-logger = logging.getLogger(__name__)
-class RelevanceDecision(NamedTuple):
-    relevant: bool
-    reason: str  # "ok" | "label_not_in_required_classes" | "below_confidence"
-def evaluate_relevance(
-    detection: Dict[str, Any],
-    criteria: RelevanceCriteria,
-) -> RelevanceDecision:
-    """Evaluate whether a detection is relevant to the mission.
-    Pure deterministic predicate — no LLM involvement.
-    Args:
-        detection: Detection dict with at least 'label' and 'score' keys.
-        criteria: RelevanceCriteria with required_classes and min_confidence.
-    Returns:
-        RelevanceDecision(relevant=bool, reason=str).
-    """
-    label = (detection.get("label") or "").lower().strip()
-    confidence = detection.get("score", 0.0)
-    if not label:
-        return RelevanceDecision(False, "label_not_in_required_classes")
-    # Build lowercase set of required classes for comparison
-    required_lower = {c.lower() for c in criteria.required_classes}
-    # Direct match
-    if label in required_lower:
-        if confidence < criteria.min_confidence:
-            return RelevanceDecision(False, "below_confidence")
-        return RelevanceDecision(True, "ok")
-    # Synonym match via COCO canonicalization
-    canonical = canonicalize_coco_name(label)
-    if canonical and canonical.lower() in required_lower:
-        if confidence < criteria.min_confidence:
-            return RelevanceDecision(False, "below_confidence")
-        return RelevanceDecision(True, "ok")
-    # Check if any required class canonicalizes to the same COCO class as the label
-    if canonical:
-        for req in criteria.required_classes:
-            req_canonical = canonicalize_coco_name(req)
-            if req_canonical and req_canonical.lower() == canonical.lower():
-                if confidence < criteria.min_confidence:
-                    return RelevanceDecision(False, "below_confidence")
-                return RelevanceDecision(True, "ok")
-    return RelevanceDecision(False, "label_not_in_required_classes")
-def evaluate_relevance_llm(
-    detected_labels: List[str],
-    mission_text: str,
-) -> Set[str]:
-    """Ask GPT which detected labels are relevant to the mission.
-    Called ONCE on frame 0 with the unique labels found by the detector.
-    Returns a set of relevant label strings (lowercased).
-    On API failure, falls back to accepting all labels (fail-open, logged).
-    """
-    if not detected_labels:
-        return set()
-    if not get_api_key():
-        logger.warning(
-            "OPENAI_API_KEY not set — LLM relevance filter falling back to accept-all"
-        )
-        return set(detected_labels)
-    prompt = (
-        f"Given this mission: \"{mission_text}\"\n\n"
-        f"Which of these detected object classes are relevant to the mission?\n"
-        f"{json.dumps(detected_labels)}\n\n"
-        "Return JSON: {\"relevant_labels\": [...]}\n"
-        "Only include labels from the provided list that are relevant to "
-        "accomplishing the mission. Be inclusive — if in doubt, include it."
-    )
-    payload = {
-        "model": "gpt-4o-mini",
-        "temperature": 0.0,
-        "max_tokens": 200,
-        "response_format": {"type": "json_object"},
-        "messages": [
-            {"role": "system", "content": "You are a mission relevance filter. Return only JSON."},
-            {"role": "user", "content": prompt},
-        ],
-    }
-    try:
-        resp_data = chat_completion(payload)
-        content, _refusal = extract_content(resp_data)
-        if not content:
-            logger.warning("GPT returned empty content for relevance filter — accept-all")
-            return set(detected_labels)
-        result = json.loads(content)
-        relevant = result.get("relevant_labels", detected_labels)
-        relevant_set = {label.lower() for label in relevant}
-        logger.info(
-            "LLM relevance filter: mission=%r detected=%s relevant=%s",
-            mission_text, detected_labels, relevant_set,
-        )
-        return relevant_set
-    except OpenAIAPIError as e:
-        logger.warning("LLM relevance API call failed: %s — accept-all fallback", e)
-        return set(detected_labels)
-    except (json.JSONDecodeError, KeyError, TypeError) as e:
-        logger.warning("LLM relevance response parse failed: %s — accept-all fallback", e)
-        return set(detected_labels)

utils/schemas.py DELETED Viewed

@@ -1,115 +0,0 @@
-from pydantic import BaseModel, Field
-from typing import List, Literal
-# --- Mission-Driven Abstractions ---
-class RelevanceCriteria(BaseModel):
-    """Deterministic boolean predicate for filtering detections against a mission.
-    This is the ONLY input to evaluate_relevance(). It intentionally excludes
-    context_phrases, stripped_modifiers, and all LLM-derived context so that
-    relevance filtering remains purely deterministic (INV-13).
-    """
-    required_classes: List[str] = Field(
-        ..., min_length=1,
-        description="Object categories that satisfy the mission. "
-                    "Detections whose label is not in this list are excluded."
-    )
-    min_confidence: float = Field(
-        default=0.0, ge=0.0, le=1.0,
-        description="Minimum detector confidence to consider a detection relevant."
-    )
-class MissionSpecification(BaseModel):
-    """Structured representation of operator intent.
-    Created once from raw mission text at the API boundary (app.py).
-    Forwarded to: detector (object_classes), GPT (full spec), chat (full spec),
-    relevance gate (relevance_criteria only — INV-13).
-    INVARIANT INV-13: context_phrases are forwarded to LLM reasoning layers
-    (GPT threat assessment, threat chat) as situational context ONLY.
-    They must NEVER be used in evaluate_relevance(), prioritization,
-    or any deterministic filtering/sorting logic.
-    """
-    # --- Extracted by LLM or fast-path ---
-    object_classes: List[str] = Field(
-        ..., min_length=1,
-        description="Concrete, visually detectable object categories to detect. "
-                    "These become detector queries. Must be nouns, not adjectives or verbs."
-    )
-    mission_intent: Literal[
-        "DETECT", "CLASSIFY", "TRACK", "ASSESS_THREAT", "MONITOR"
-    ] = Field(
-        ...,
-        description="Operator purpose. DETECT=find objects, CLASSIFY=identify type, "
-                    "TRACK=follow over time, ASSESS_THREAT=evaluate danger, MONITOR=passive watch."
-    )
-    domain: Literal[
-        "NAVAL", "GROUND", "AERIAL", "URBAN", "GENERIC"
-    ] = Field(
-        ...,
-        description="Operational domain. Selects the GPT assessment schema and system prompt."
-    )
-    domain_source: Literal["INFERRED", "OPERATOR_SET"] = Field(
-        default="INFERRED",
-        description="Whether domain was LLM-inferred or explicitly set by operator."
-    )
-    # --- Deterministic (derived from object_classes) ---
-    relevance_criteria: RelevanceCriteria = Field(
-        ...,
-        description="Boolean predicate for filtering detections. "
-                    "Built deterministically from object_classes after extraction."
-    )
-    # --- Context preservation ---
-    context_phrases: List[str] = Field(
-        default_factory=list,
-        description="Non-class contextual phrases from mission text. "
-                    "E.g., 'approaching from the east', 'near the harbor'. "
-                    "Forwarded to GPT as situational context, NOT used for detection."
-    )
-    stripped_modifiers: List[str] = Field(
-        default_factory=list,
-        description="Adjectives/modifiers removed during extraction. "
-                    "E.g., 'hostile', 'suspicious', 'friendly'. Logged for audit."
-    )
-    operator_text: str = Field(
-        ...,
-        description="Original unmodified mission text from the operator. Preserved for audit."
-    )
-    # --- Parse mode ---
-    parse_mode: Literal["FAST_PATH", "LLM_EXTRACTED"] = Field(
-        default="FAST_PATH",
-        description="How this spec was created. FAST_PATH = comma-separated labels, "
-                    "LLM_EXTRACTED = natural language parsed by GPT."
-    )
-    # --- LLM self-assessment ---
-    parse_confidence: Literal["HIGH", "MEDIUM", "LOW"] = Field(
-        ...,
-        description="Confidence in the extraction. "
-                    "LOW = could not reliably extract classes -> triggers rejection."
-    )
-    parse_warnings: List[str] = Field(
-        default_factory=list,
-        description="Specific issues encountered during extraction. "
-                    "E.g., 'term \"threat\" is not a visual class, stripped'."
-    )
-class AssessmentStatus:
-    """Canonical string constants for detection assessment lifecycle."""
-    ASSESSED = "ASSESSED"
-    UNASSESSED = "UNASSESSED"
-    PENDING_GPT = "PENDING_GPT"
-    SKIPPED_POLICY = "SKIPPED_POLICY"
-    REFUSED = "REFUSED"
-    ERROR = "ERROR"
-    NO_RESPONSE = "NO_RESPONSE"
-    STALE = "STALE"

utils/threat_chat.py DELETED Viewed

@@ -1,154 +0,0 @@
-"""
-Threat Chat Module - GPT-powered Q&A about detected threats.
-"""
-import logging
-from typing import List, Dict, Any
-from utils.openai_client import chat_completion, extract_content, get_api_key, OpenAIAPIError
-from utils.gpt_reasoning import _DOMAIN_ROLES
-logger = logging.getLogger(__name__)
-def chat_about_threats(
-    question: str,
-    detections: List[Dict[str, Any]],
-    mission_spec_dict: Dict[str, Any] = None,
-) -> str:
-    """
-    Answer user questions about detected threats using GPT.
-    Args:
-        question: User's question about the current threat situation.
-        detections: List of detection dicts with gpt_raw threat analysis.
-        mission_spec_dict: Optional dict of mission specification fields.
-    Returns:
-        GPT's response as a string.
-    """
-    if not get_api_key():
-        logger.warning("OPENAI_API_KEY not set. Cannot process threat chat.")
-        return "Error: OpenAI API key not configured."
-    if not detections:
-        return "No threats detected yet. Run detection first to analyze the scene."
-    # Build threat context from detections
-    threat_context = _build_threat_context(detections)
-    # Domain-aware role selection
-    domain = "GENERIC"
-    if mission_spec_dict:
-        domain = mission_spec_dict.get("domain", "GENERIC")
-    role_label = _DOMAIN_ROLES.get(domain, _DOMAIN_ROLES["GENERIC"])
-    # Build mission context block (INV-8: mission context forwarded to LLM calls)
-    mission_block = ""
-    if mission_spec_dict:
-        mission_block = "\nMISSION CONTEXT:\n"
-        if mission_spec_dict.get("mission_intent"):
-            mission_block += f"- Intent: {mission_spec_dict['mission_intent']}\n"
-        if mission_spec_dict.get("domain"):
-            mission_block += f"- Domain: {mission_spec_dict['domain']}\n"
-        if mission_spec_dict.get("object_classes"):
-            mission_block += f"- Target Classes: {', '.join(mission_spec_dict['object_classes'])}\n"
-        if mission_spec_dict.get("context_phrases"):
-            mission_block += f"- Situation: {'; '.join(mission_spec_dict['context_phrases'])}\n"
-        mission_block += "\n"
-    system_prompt = (
-        f"You are a {role_label} providing real-time threat analysis support. "
-        "You have access to the current threat assessment data from optical surveillance. "
-        "Answer questions concisely and tactically. Use military terminology where appropriate. "
-        "If asked about engagement recommendations, always note that final decisions rest with the commanding officer.\n\n"
-        f"{mission_block}"
-        "CURRENT THREAT PICTURE:\n"
-        f"{threat_context}\n\n"
-        "Respond to the operator's question based on this threat data."
-    )
-    payload = {
-        "model": "gpt-4o",
-        "messages": [
-            {"role": "system", "content": system_prompt},
-            {"role": "user", "content": question}
-        ],
-        "max_tokens": 500,
-        "temperature": 0.3,
-    }
-    try:
-        resp_data = chat_completion(payload)
-        content, _refusal = extract_content(resp_data)
-        return content.strip() if content else "No response generated."
-    except OpenAIAPIError as e:
-        logger.error("OpenAI API error: %s", e)
-        return f"API Error: {e}"
-    except Exception as e:
-        logger.error("Threat chat failed: %s", e)
-        return f"Error processing question: {str(e)}"
-def _build_threat_context(detections: List[Dict[str, Any]]) -> str:
-    """Build a text summary of all detected threats for GPT context."""
-    lines = []
-    for det in detections:
-        obj_id = det.get("id", "Unknown")
-        label = det.get("label", "object")
-        # Extract GPT raw data if available
-        gpt_raw = det.get("gpt_raw") or det.get("features") or {}
-        # Universal schema fields (with fallbacks to legacy names)
-        obj_type = gpt_raw.get("object_type") or gpt_raw.get("vessel_category", label)
-        size = gpt_raw.get("size", "")
-        threat_score = (
-            det.get("threat_level_score")
-            or gpt_raw.get("threat_level")
-            or gpt_raw.get("threat_level_score", "?")
-        )
-        threat_class = (
-            det.get("threat_classification")
-            or gpt_raw.get("threat_classification", "Unknown")
-        )
-        weapons = gpt_raw.get("visible_weapons", [])
-        weapon_ready = gpt_raw.get("weapon_readiness") or det.get("weapon_readiness", "Unknown")
-        motion = gpt_raw.get("motion_status", "Unknown")
-        range_est = gpt_raw.get("range_estimate") or gpt_raw.get("range_estimation_nm", "")
-        bearing = gpt_raw.get("bearing") or gpt_raw.get("bearing_clock") or det.get("gpt_direction", "")
-        intent = gpt_raw.get("tactical_intent", "")
-        dynamic_features = gpt_raw.get("dynamic_features", [])
-        # Build entry
-        entry = f"[{obj_id}] {obj_type}"
-        if size and size != "Unknown":
-            entry += f" ({size})"
-        entry += f"\n  - Threat: {threat_class} (Score: {threat_score}/10)"
-        if range_est:
-            entry += f"\n  - Range: {range_est}"
-        if bearing and bearing != "Unknown":
-            entry += f", Bearing: {bearing}"
-        if motion and motion != "Unknown":
-            entry += f"\n  - Motion: {motion}"
-        if weapons:
-            entry += f"\n  - Weapons: {', '.join(weapons) if isinstance(weapons, list) else weapons}"
-        if weapon_ready and weapon_ready != "Unknown":
-            entry += f" ({weapon_ready})"
-        if intent:
-            entry += f"\n  - Assessed Intent: {intent}"
-        # Append dynamic features
-        for feat in dynamic_features:
-            if isinstance(feat, dict):
-                key = feat.get("key", "")
-                value = feat.get("value", "")
-                if key and value:
-                    entry += f"\n  - {key}: {value}"
-        lines.append(entry)
-    return "\n\n".join(lines) if lines else "No threat data available."