Spaces:

BiasLab2025
/

perception

Starting on A100

App Files Files Community

Zhen Ye commited on 4 days ago

Commit

8094b21

1 Parent(s): 0834617

feat(threat-assessment): implement naval threat analysis with GPT-4o\n\n- Rename utils/gpt_distance.py to utils/gpt_reasoning.py and update logic for 15 naval threat features\n- Add Pydantic schemas for NavalThreatAssessment in utils/schemas.py\n- Update backend (app.py, inference.py) to use new threat estimation and pass full metadata\n- refactor(frontend): render threat level badges and detailed feature table in UI

Browse files

Files changed (6) hide show

app.py +3 -3
frontend/js/main.js +20 -3
frontend/js/ui/cards.js +4 -1
inference.py +8 -3
utils/{gpt_distance.py → gpt_reasoning.py} +71 -37
utils/schemas.py +42 -0

app.py CHANGED Viewed

@@ -55,7 +55,7 @@ from jobs.storage import (
     get_job_storage,
     get_output_video_path,
 )
-from utils.gpt_distance import estimate_distance_gpt
 logging.basicConfig(level=logging.INFO)
@@ -676,8 +676,8 @@ async def reason_track(
     # This is blocking, but that's expected for this endpoint structure.
     # For high concurrency, might want to offload to threadpool or async wrapper.
     try:
-        # estimate_distance_gpt reads the file from disk
-        results = await asyncio.to_thread(estimate_distance_gpt, input_path, track_list)
         logging.info(f"GPT Output for Video Track Update:\n{results}")
     except Exception as e:
         logging.exception("GPT reasoning failed")

     get_job_storage,
     get_output_video_path,
 )
+from utils.gpt_reasoning import estimate_threat_gpt
 logging.basicConfig(level=logging.INFO)
     # This is blocking, but that's expected for this endpoint structure.
     # For high concurrency, might want to offload to threadpool or async wrapper.
     try:
+        # estimate_threat_gpt reads the file from disk
+        results = await asyncio.to_thread(estimate_threat_gpt, input_path, track_list)
         logging.info(f"GPT Output for Video Track Update:\n{results}")
     except Exception as e:
         logging.exception("GPT reasoning failed")

frontend/js/main.js CHANGED Viewed

@@ -125,7 +125,7 @@ document.addEventListener("DOMContentLoaded", () => {
             // Auto-enable demo mode if filename contains "demo" or helicopter video
             const shouldEnableDemo = file.name.toLowerCase().includes("demo") ||
-                                     file.name.toLowerCase().includes("enhance_video_movement");
             if (shouldEnableDemo && APP.core.demo.data) {
                 enableDemo(true);
                 log("Auto-enabled DEMO mode for this video.", "g");
@@ -544,7 +544,20 @@ document.addEventListener("DOMContentLoaded", () => {
                 score: d.score || 0.5,
                 bbox,
                 aim: { ...ap },
-                features: {},
                 baseRange_m: null,
                 baseAreaFrac: (bbox.w * bbox.h) / (state.frame.w * state.frame.h),
                 baseDwell_s: 5.0,
@@ -557,7 +570,11 @@ document.addEventListener("DOMContentLoaded", () => {
                 depth_valid: d.depth_valid ?? false,
                 gpt_distance_m: d.gpt_distance_m || null,
                 gpt_direction: d.gpt_direction || null,
-                gpt_description: d.gpt_description || null
             };
         });

             // Auto-enable demo mode if filename contains "demo" or helicopter video
             const shouldEnableDemo = file.name.toLowerCase().includes("demo") ||
+                file.name.toLowerCase().includes("enhance_video_movement");
             if (shouldEnableDemo && APP.core.demo.data) {
                 enableDemo(true);
                 log("Auto-enabled DEMO mode for this video.", "g");
                 score: d.score || 0.5,
                 bbox,
                 aim: { ...ap },
+                aim: { ...ap },
+                features: d.gpt_raw ? {
+                    "Vessel Class": d.gpt_raw.specific_class || d.gpt_raw.vessel_category || "Unknown",
+                    "Threat Lvl": d.gpt_raw.threat_level_score + "/10",
+                    "Status": d.gpt_raw.threat_classification || "?",
+                    "Weapons": (d.gpt_raw.visible_weapons || []).join(", ") || "None Visible",
+                    "Readiness": d.gpt_raw.weapon_readiness || "Unknown",
+                    "Motion": d.gpt_raw.motion_status || "Unknown",
+                    "Sensors": (d.gpt_raw.sensor_profile || []).join(", ") || "None",
+                    "Flags/ID": (d.gpt_raw.identity_markers || []).join(", ") || (d.gpt_raw.flag_state || "Unknown"),
+                    "Activity": d.gpt_raw.deck_activity || "None",
+                    "Range": (d.gpt_raw.range_estimation_nm ? d.gpt_raw.range_estimation_nm + " NM" : "Unknown"),
+                    "Wake": d.gpt_raw.wake_description || "None"
+                } : {},
                 baseRange_m: null,
                 baseAreaFrac: (bbox.w * bbox.h) / (state.frame.w * state.frame.h),
                 baseDwell_s: 5.0,
                 depth_valid: d.depth_valid ?? false,
                 gpt_distance_m: d.gpt_distance_m || null,
                 gpt_direction: d.gpt_direction || null,
+                gpt_description: d.gpt_description || null,
+                // New Threat Intelligence
+                threat_level_score: d.threat_level_score || 0,
+                threat_classification: d.threat_classification || "Unknown",
+                weapon_readiness: d.weapon_readiness || "Unknown"
             };
         });

frontend/js/ui/cards.js CHANGED Viewed

@@ -52,7 +52,10 @@ APP.ui.cards.renderFrameTrackList = function () {
         card.innerHTML = `
             <div class="track-card-header">
                 <span>${id} · ${det.label}</span>
-                <span class="badgemini">${(det.score * 100).toFixed(0)}%</span>
             </div>
             <div class="track-card-meta">
                RANGE: ${rangeStr} | BEARING: ${bearingStr}

         card.innerHTML = `
             <div class="track-card-header">
                 <span>${id} · ${det.label}</span>
+                <div style="display:flex; gap:4px">
+                    ${det.threat_level_score > 0 ? `<span class="badgemini" style="background:${det.threat_level_score >= 8 ? '#ff4d4d' : '#ff9f43'}; color:white">T-${det.threat_level_score}</span>` : ''}
+                    <span class="badgemini">${(det.score * 100).toFixed(0)}%</span>
+                </div>
             </div>
             <div class="track-card-meta">
                RANGE: ${rangeStr} | BEARING: ${bearingStr}

inference.py CHANGED Viewed

@@ -22,7 +22,7 @@ from models.segmenters.model_loader import load_segmenter, load_segmenter_on_dev
 from models.depth_estimators.model_loader import load_depth_estimator, load_depth_estimator_on_device
 from models.depth_estimators.base import DepthEstimator
 from utils.video import extract_frames, write_video, VideoReader, VideoWriter, AsyncVideoReader
-from utils.gpt_distance import estimate_distance_gpt
 from jobs.storage import set_track_data
 import tempfile
@@ -768,7 +768,7 @@ def process_first_frame(
         try:
             with tempfile.NamedTemporaryFile(suffix=".jpg", delete=False) as tmp_img:
                 cv2.imwrite(tmp_img.name, frame)
-                gpt_results = estimate_distance_gpt(tmp_img.name, detections)
                 logging.info(f"GPT Output for First Frame:\n{gpt_results}") # Expose to HF logs
                 os.remove(tmp_img.name) # Clean up immediatey
@@ -783,7 +783,12 @@ def process_first_frame(
                      det["gpt_distance_m"] = info.get("distance_m")
                      det["gpt_direction"] = info.get("direction")
                      det["gpt_description"] = info.get("description")
-                     # GPT is the sole source of distance - no polyfill needed
         except Exception as e:
             logging.error(f"GPT Distance estimation failed: {e}")

 from models.depth_estimators.model_loader import load_depth_estimator, load_depth_estimator_on_device
 from models.depth_estimators.base import DepthEstimator
 from utils.video import extract_frames, write_video, VideoReader, VideoWriter, AsyncVideoReader
+from utils.gpt_reasoning import estimate_threat_gpt
 from jobs.storage import set_track_data
 import tempfile
         try:
             with tempfile.NamedTemporaryFile(suffix=".jpg", delete=False) as tmp_img:
                 cv2.imwrite(tmp_img.name, frame)
+                gpt_results = estimate_threat_gpt(tmp_img.name, detections)
                 logging.info(f"GPT Output for First Frame:\n{gpt_results}") # Expose to HF logs
                 os.remove(tmp_img.name) # Clean up immediatey
                      det["gpt_distance_m"] = info.get("distance_m")
                      det["gpt_direction"] = info.get("direction")
                      det["gpt_description"] = info.get("description")
+                     # Threat Intelligence
+                     det["threat_level_score"] = info.get("threat_level_score")
+                     det["threat_classification"] = info.get("threat_classification")
+                     det["weapon_readiness"] = info.get("weapon_readiness")
+                     # Full Metadata for Feature Table
+                     det["gpt_raw"] = info
         except Exception as e:
             logging.error(f"GPT Distance estimation failed: {e}")

utils/{gpt_distance.py → gpt_reasoning.py} RENAMED Viewed

@@ -5,6 +5,7 @@ import logging
 from typing import List, Dict, Any, Optional
 import urllib.request
 import urllib.error
 logger = logging.getLogger(__name__)
@@ -12,34 +13,33 @@ def encode_image(image_path: str) -> str:
     with open(image_path, "rb") as image_file:
         return base64.b64encode(image_file.read()).decode('utf-8')
-def estimate_distance_gpt(
     image_path: str,
     detections: List[Dict[str, Any]]
 ) -> Dict[str, Any]:
     """
-    Estimate distance and direction for detected objects using GPT-4o.
     Args:
         image_path: Path to the image file.
         detections: List of detection dicts (bbox, label, etc.).
     Returns:
-        Dict mapping object ID (e.g., T01) to distance/direction info.
     """
     api_key = os.environ.get("OPENAI_API_KEY")
     if not api_key:
-        logger.warning("OPENAI_API_KEY not set. Skipping GPT distance estimation.")
         return {}
     # 1. Prepare detections summary for prompt
-    # We assign temporary IDs here if they don't exist, to match what we send to GPT
     det_summary = []
     for i, det in enumerate(detections):
         # UI uses T01, T02... logic usually matches index + 1
         obj_id = f"T{str(i+1).zfill(2)}"
         bbox = det.get("bbox", [])
         label = det.get("label", "object")
-        det_summary.append(f"- ID: {obj_id}, Label: {label}, BBox: {bbox}")
     det_text = "\n".join(det_summary)
@@ -53,34 +53,50 @@ def estimate_distance_gpt(
         logger.error(f"Failed to encode image for GPT: {e}")
         return {}
-    # 3. Construct Prompt
     system_prompt = (
-        "You are an expert perception system for an autonomous vehicle or surveillance system. "
-        "Your task is to estimate the distance (in meters) and direction (relative to the camera) of detected objects in an image. "
         "ASSUMPTIONS:\n"
-        "- The camera is mounted at a standard height (approx 1.5 - 2.0 meters).\n"
-        "- Standard field of view (~60-90 degrees).\n"
-        "- Typical object sizes: Person ~1.7m tall, Car ~1.8m wide, Truck ~2.5m wide.\n"
-        "OUTPUT FORMAT:\n"
-        "Return STRICT JSON ONLY. Do not include markdown formatting (```json ... ```). "
-        "The JSON must be an object with a key 'objects' containing a list. "
-        "Each item in `objects` must have:\n"
-        "- `id`: The object ID provided in the input.\n"
-        "- `distance_m`: Estimated distance in meters (float).\n"
-        "- `direction`: Direction description (e.g., '12 o\\'clock', '1 o\\'clock', '10 o\\'clock'). "
-        "Assume 12 o'clock is straight ahead.\n"
-        "- `description`: Brief visual description (e.g., 'Red sedan moving away').\n"
     )
     user_prompt = (
-        f"Analyze this image. The following objects have been detected with bounding boxes [x1, y1, x2, y2]:\n"
         f"{det_text}\n\n"
-        "Provide distance and direction estimates for these objects based on their size and position in the scene."
     )
     # 4. Call API
     payload = {
-        "model": "gpt-4o-mini",
         "messages": [
             {
                 "role": "system",
@@ -102,8 +118,8 @@ def estimate_distance_gpt(
                 ]
             }
         ],
-        "max_tokens": 1000,
-        "temperature": 0.2,
         "response_format": { "type": "json_object" }
     }
@@ -127,20 +143,38 @@ def estimate_distance_gpt(
             logger.warning("GPT returned empty content. Full response: %s", resp_data)
             return {}
-        # Clean potential markdown headers if GPT ignores instruction
-        if content.startswith("```json"):
-            content = content[7:]
-        if content.endswith("```"):
-            content = content[:-3]
         result_json = json.loads(content)
-        # Map back to a dict: {ID: {data}}
-        mapped_results = {}
-        for obj in result_json.get("objects", []):
-            mapped_results[obj["id"]] = obj
-        return mapped_results
     except Exception as e:
         logger.error(f"GPT API call failed: {e}")

 from typing import List, Dict, Any, Optional
 import urllib.request
 import urllib.error
+from utils.schemas import FrameThreatAnalysis
 logger = logging.getLogger(__name__)
     with open(image_path, "rb") as image_file:
         return base64.b64encode(image_file.read()).decode('utf-8')
+def estimate_threat_gpt(
     image_path: str,
     detections: List[Dict[str, Any]]
 ) -> Dict[str, Any]:
     """
+    Perform Naval Threat Assessment on detected objects using GPT-4o.
     Args:
         image_path: Path to the image file.
         detections: List of detection dicts (bbox, label, etc.).
     Returns:
+        Dict mapping object ID (e.g., T01) to NavalThreatAssessment dict.
     """
     api_key = os.environ.get("OPENAI_API_KEY")
     if not api_key:
+        logger.warning("OPENAI_API_KEY not set. Skipping GPT threat assessment.")
         return {}
     # 1. Prepare detections summary for prompt
     det_summary = []
     for i, det in enumerate(detections):
         # UI uses T01, T02... logic usually matches index + 1
         obj_id = f"T{str(i+1).zfill(2)}"
         bbox = det.get("bbox", [])
         label = det.get("label", "object")
+        det_summary.append(f"- ID: {obj_id}, Classification Hint: {label}, BBox: {bbox}")
     det_text = "\n".join(det_summary)
         logger.error(f"Failed to encode image for GPT: {e}")
         return {}
+    # 3. Construct Prompt (Naval Focused)
     system_prompt = (
+        "You are an elite Naval Intelligence Officer and Threat Analyst. "
+        "Your task is to analyze optical surveillance imagery and provide a detailed tactical assessment for every detected object. "
+        "You must output a STRICT JSON object that matches the following schema for every object ID provided:\n\n"
+        "RESPONSE SCHEMA (JSON):\n"
+        "{\n"
+        "  \"objects\": {\n"
+        "    \"T01\": {\n"
+        "      \"vessel_category\": \"Warship\" | \"Commercial\" | \"Fishing\" | \"Small Boat\" | \"Aircraft\" | \"Unknown\",\n"
+        "      \"specific_class\": \"string (e.g., Arleigh Burke, Skiff)\",\n"
+        "      \"identity_markers\": [\"string (hull numbers, flags)\"],\n"
+        "      \"flag_state\": \"string (Country)\",\n"
+        "      \"visible_weapons\": [\"string\"],\n"
+        "      \"weapon_readiness\": \"Stowed/PEACE\" | \"Trained/Aiming\" | \"Firing/HOSTILE\",\n"
+        "      \"sensor_profile\": [\"string (radars)\"],\n"
+        "      \"motion_status\": \"Dead in Water\" | \"Underway Slow\" | \"Underway Fast\" | \"Flank Speed\",\n"
+        "      \"wake_description\": \"string\",\n"
+        "      \"aspect\": \"Bow-on\" | \"Stern-on\" | \"Broadside\",\n"
+        "      \"range_estimation_nm\": float (Nautical Miles),\n"
+        "      \"bearing_clock\": \"string (e.g. 12 o'clock)\",\n"
+        "      \"deck_activity\": \"string\",\n"
+        "      \"special_features\": [\"string (anomalies)\"],\n"
+        "      \"threat_level_score\": int (1-10),\n"
+        "      \"threat_classification\": \"Friendly\" | \"Neutral\" | \"Suspect\" | \"Hostile\",\n"
+        "      \"tactical_intent\": \"string (e.g., Transit, Attack)\"\n"
+        "    }\n"
+        "  }\n"
+        "}\n\n"
         "ASSUMPTIONS:\n"
+        "- Unknown small boats approaching larger vessels are HIGH threat (Suspect/Hostile).\n"
+        "- Visible trained weapons are IMMINENT threat (Score 9-10).\n"
+        "- Ignore artifacts, focus on the objects."
     )
     user_prompt = (
+        f"Analyze this naval surveillance image. The following objects have been detected:\n"
         f"{det_text}\n\n"
+        "Provide a detailed Naval Threat Assessment for each object based on its visual signatures."
     )
     # 4. Call API
     payload = {
+        "model": "gpt-4o", # Use 4o for better vision analysis
         "messages": [
             {
                 "role": "system",
                 ]
             }
         ],
+        "max_tokens": 1500,
+        "temperature": 0.2, # Low temp for factual consistency
         "response_format": { "type": "json_object" }
     }
             logger.warning("GPT returned empty content. Full response: %s", resp_data)
             return {}
         result_json = json.loads(content)
+        objects = result_json.get("objects", {})
+        # Polyfill legacy fields for frontend compatibility
+        for obj_id, data in objects.items():
+            # 1. Distance: NM -> Meters
+            if "range_estimation_nm" in data:
+                data["distance_m"] = data["range_estimation_nm"] * 1852.0
+                data["gpt_distance_m"] = data["distance_m"] # Explicit legacy key
+            # 2. Direction
+            if "bearing_clock" in data:
+                data["direction"] = data["bearing_clock"]
+                data["gpt_direction"] = data["bearing_clock"]
+            # 3. Description (Summary of new complex fields)
+            # e.g. "Warship (Arleigh Burke) - THREAT: 9"
+            category = data.get("vessel_category", "Unknown")
+            spec = data.get("specific_class", "")
+            threat = data.get("threat_classification", "Unknown")
+            score = data.get("threat_level_score", 0)
+            desc_parts = [category]
+            if spec:
+                desc_parts.append(f"({spec})")
+            desc_parts.append(f"[{threat.upper()} Lvl:{score}]")
+            data["description"] = " ".join(desc_parts)
+            data["gpt_description"] = data["description"]
+        return objects
     except Exception as e:
         logger.error(f"GPT API call failed: {e}")

utils/schemas.py ADDED Viewed

	@@ -0,0 +1,42 @@

+from pydantic import BaseModel, Field
+from typing import List, Optional, Literal
+class NavalThreatAssessment(BaseModel):
+    """
+    Tactical threat assessment for a detected object in a maritime environment.
+    """
+    # 1. Classification
+    vessel_category: Literal["Warship", "Commercial", "Fishing", "Recreational", "Small Boat", "Aircraft", "Unknown"] = Field(..., description="Broad category of the vessel/object.")
+    specific_class: Optional[str] = Field(None, description="Specific class if identifiable (e.g., 'Arleigh Burke', 'Dhow', 'Skiff').")
+    # 2. Identification
+    identity_markers: List[str] = Field(default_factory=list, description="Visible identifiers: Hull Numbers, Names, Flags, Funnel markings.")
+    flag_state: Optional[str] = Field(None, description="Country of origin based on flag or markings.")
+    # 3. Capabilities & Weapons
+    visible_weapons: List[str] = Field(default_factory=list, description="Visible weaponry: 'Deck Gun', 'VLS', 'Torpedo Tubes', 'Crew Served Weapons'.")
+    weapon_readiness: Literal["Stowed/PEACE", "Manned/Tens", "Trained/Aiming", "Firing/HOSTILE", "Unknown"] = Field(..., description="State of visible weapons.")
+    # 4. Sensors & Electronics
+    sensor_profile: List[str] = Field(default_factory=list, description="Visible sensors: 'Rotating Search Radar', 'Fire Control Director', 'Dome'.")
+    # 5. Kinematics
+    motion_status: Literal["Dead in Water", "Stationary/Anchored", "Underway Slow", "Underway Fast", "Flank Speed"] = Field(..., description="Movement status based on wake and bow wave.")
+    wake_description: Optional[str] = Field(None, description="Description of the wake (e.g., 'Large turbulent wake', 'No wake').")
+    # 6. Spatial / Geometry
+    aspect: str = Field(..., description="Target aspect relative to sensor: 'Bow-on', 'Stern-on', 'Broadside Port', 'Broadside Starboard'.")
+    range_estimation_nm: float = Field(..., description="Estimated range in Nautical Miles.")
+    bearing_clock: str = Field(..., description="Relative bearing in clock format (12 o'clock = Bow).")
+    # 7. Operational Context
+    deck_activity: str = Field("None", description="Activity on deck: 'Flight Ops', 'Cargo Handling', 'Personnel gathering', 'Empty'.")
+    special_features: List[str] = Field(default_factory=list, description="Anomalies: 'Rust streaks', 'Camouflage', 'Antenna forest', 'RHIBs on davits'.")
+    # 8. Threat Assessment
+    threat_level_score: int = Field(..., ge=1, le=10, description="1-10 Threat Score (1=Benign, 10=Imminent Attack).")
+    threat_classification: Literal["Friendly", "Neutral", "Suspect", "Hostile"] = Field(..., description="Tactical classification.")
+    tactical_intent: str = Field(..., description="Inferred intent: 'Transit', 'Intelligence Gathering', 'Harassment', 'Attack Profile'.")
+class FrameThreatAnalysis(BaseModel):
+    objects: dict[str, NavalThreatAssessment] = Field(..., description="Map of Object ID (e.g., 'T01') to its assessment.")