Zhen Ye commited on
Commit
8094b21
·
1 Parent(s): 0834617

feat(threat-assessment): implement naval threat analysis with GPT-4o\n\n- Rename utils/gpt_distance.py to utils/gpt_reasoning.py and update logic for 15 naval threat features\n- Add Pydantic schemas for NavalThreatAssessment in utils/schemas.py\n- Update backend (app.py, inference.py) to use new threat estimation and pass full metadata\n- refactor(frontend): render threat level badges and detailed feature table in UI

Browse files
app.py CHANGED
@@ -55,7 +55,7 @@ from jobs.storage import (
55
  get_job_storage,
56
  get_output_video_path,
57
  )
58
- from utils.gpt_distance import estimate_distance_gpt
59
 
60
  logging.basicConfig(level=logging.INFO)
61
 
@@ -676,8 +676,8 @@ async def reason_track(
676
  # This is blocking, but that's expected for this endpoint structure.
677
  # For high concurrency, might want to offload to threadpool or async wrapper.
678
  try:
679
- # estimate_distance_gpt reads the file from disk
680
- results = await asyncio.to_thread(estimate_distance_gpt, input_path, track_list)
681
  logging.info(f"GPT Output for Video Track Update:\n{results}")
682
  except Exception as e:
683
  logging.exception("GPT reasoning failed")
 
55
  get_job_storage,
56
  get_output_video_path,
57
  )
58
+ from utils.gpt_reasoning import estimate_threat_gpt
59
 
60
  logging.basicConfig(level=logging.INFO)
61
 
 
676
  # This is blocking, but that's expected for this endpoint structure.
677
  # For high concurrency, might want to offload to threadpool or async wrapper.
678
  try:
679
+ # estimate_threat_gpt reads the file from disk
680
+ results = await asyncio.to_thread(estimate_threat_gpt, input_path, track_list)
681
  logging.info(f"GPT Output for Video Track Update:\n{results}")
682
  except Exception as e:
683
  logging.exception("GPT reasoning failed")
frontend/js/main.js CHANGED
@@ -125,7 +125,7 @@ document.addEventListener("DOMContentLoaded", () => {
125
 
126
  // Auto-enable demo mode if filename contains "demo" or helicopter video
127
  const shouldEnableDemo = file.name.toLowerCase().includes("demo") ||
128
- file.name.toLowerCase().includes("enhance_video_movement");
129
  if (shouldEnableDemo && APP.core.demo.data) {
130
  enableDemo(true);
131
  log("Auto-enabled DEMO mode for this video.", "g");
@@ -544,7 +544,20 @@ document.addEventListener("DOMContentLoaded", () => {
544
  score: d.score || 0.5,
545
  bbox,
546
  aim: { ...ap },
547
- features: {},
 
 
 
 
 
 
 
 
 
 
 
 
 
548
  baseRange_m: null,
549
  baseAreaFrac: (bbox.w * bbox.h) / (state.frame.w * state.frame.h),
550
  baseDwell_s: 5.0,
@@ -557,7 +570,11 @@ document.addEventListener("DOMContentLoaded", () => {
557
  depth_valid: d.depth_valid ?? false,
558
  gpt_distance_m: d.gpt_distance_m || null,
559
  gpt_direction: d.gpt_direction || null,
560
- gpt_description: d.gpt_description || null
 
 
 
 
561
  };
562
  });
563
 
 
125
 
126
  // Auto-enable demo mode if filename contains "demo" or helicopter video
127
  const shouldEnableDemo = file.name.toLowerCase().includes("demo") ||
128
+ file.name.toLowerCase().includes("enhance_video_movement");
129
  if (shouldEnableDemo && APP.core.demo.data) {
130
  enableDemo(true);
131
  log("Auto-enabled DEMO mode for this video.", "g");
 
544
  score: d.score || 0.5,
545
  bbox,
546
  aim: { ...ap },
547
+ aim: { ...ap },
548
+ features: d.gpt_raw ? {
549
+ "Vessel Class": d.gpt_raw.specific_class || d.gpt_raw.vessel_category || "Unknown",
550
+ "Threat Lvl": d.gpt_raw.threat_level_score + "/10",
551
+ "Status": d.gpt_raw.threat_classification || "?",
552
+ "Weapons": (d.gpt_raw.visible_weapons || []).join(", ") || "None Visible",
553
+ "Readiness": d.gpt_raw.weapon_readiness || "Unknown",
554
+ "Motion": d.gpt_raw.motion_status || "Unknown",
555
+ "Sensors": (d.gpt_raw.sensor_profile || []).join(", ") || "None",
556
+ "Flags/ID": (d.gpt_raw.identity_markers || []).join(", ") || (d.gpt_raw.flag_state || "Unknown"),
557
+ "Activity": d.gpt_raw.deck_activity || "None",
558
+ "Range": (d.gpt_raw.range_estimation_nm ? d.gpt_raw.range_estimation_nm + " NM" : "Unknown"),
559
+ "Wake": d.gpt_raw.wake_description || "None"
560
+ } : {},
561
  baseRange_m: null,
562
  baseAreaFrac: (bbox.w * bbox.h) / (state.frame.w * state.frame.h),
563
  baseDwell_s: 5.0,
 
570
  depth_valid: d.depth_valid ?? false,
571
  gpt_distance_m: d.gpt_distance_m || null,
572
  gpt_direction: d.gpt_direction || null,
573
+ gpt_description: d.gpt_description || null,
574
+ // New Threat Intelligence
575
+ threat_level_score: d.threat_level_score || 0,
576
+ threat_classification: d.threat_classification || "Unknown",
577
+ weapon_readiness: d.weapon_readiness || "Unknown"
578
  };
579
  });
580
 
frontend/js/ui/cards.js CHANGED
@@ -52,7 +52,10 @@ APP.ui.cards.renderFrameTrackList = function () {
52
  card.innerHTML = `
53
  <div class="track-card-header">
54
  <span>${id} · ${det.label}</span>
55
- <span class="badgemini">${(det.score * 100).toFixed(0)}%</span>
 
 
 
56
  </div>
57
  <div class="track-card-meta">
58
  RANGE: ${rangeStr} | BEARING: ${bearingStr}
 
52
  card.innerHTML = `
53
  <div class="track-card-header">
54
  <span>${id} · ${det.label}</span>
55
+ <div style="display:flex; gap:4px">
56
+ ${det.threat_level_score > 0 ? `<span class="badgemini" style="background:${det.threat_level_score >= 8 ? '#ff4d4d' : '#ff9f43'}; color:white">T-${det.threat_level_score}</span>` : ''}
57
+ <span class="badgemini">${(det.score * 100).toFixed(0)}%</span>
58
+ </div>
59
  </div>
60
  <div class="track-card-meta">
61
  RANGE: ${rangeStr} | BEARING: ${bearingStr}
inference.py CHANGED
@@ -22,7 +22,7 @@ from models.segmenters.model_loader import load_segmenter, load_segmenter_on_dev
22
  from models.depth_estimators.model_loader import load_depth_estimator, load_depth_estimator_on_device
23
  from models.depth_estimators.base import DepthEstimator
24
  from utils.video import extract_frames, write_video, VideoReader, VideoWriter, AsyncVideoReader
25
- from utils.gpt_distance import estimate_distance_gpt
26
  from jobs.storage import set_track_data
27
  import tempfile
28
 
@@ -768,7 +768,7 @@ def process_first_frame(
768
  try:
769
  with tempfile.NamedTemporaryFile(suffix=".jpg", delete=False) as tmp_img:
770
  cv2.imwrite(tmp_img.name, frame)
771
- gpt_results = estimate_distance_gpt(tmp_img.name, detections)
772
  logging.info(f"GPT Output for First Frame:\n{gpt_results}") # Expose to HF logs
773
  os.remove(tmp_img.name) # Clean up immediatey
774
 
@@ -783,7 +783,12 @@ def process_first_frame(
783
  det["gpt_distance_m"] = info.get("distance_m")
784
  det["gpt_direction"] = info.get("direction")
785
  det["gpt_description"] = info.get("description")
786
- # GPT is the sole source of distance - no polyfill needed
 
 
 
 
 
787
 
788
  except Exception as e:
789
  logging.error(f"GPT Distance estimation failed: {e}")
 
22
  from models.depth_estimators.model_loader import load_depth_estimator, load_depth_estimator_on_device
23
  from models.depth_estimators.base import DepthEstimator
24
  from utils.video import extract_frames, write_video, VideoReader, VideoWriter, AsyncVideoReader
25
+ from utils.gpt_reasoning import estimate_threat_gpt
26
  from jobs.storage import set_track_data
27
  import tempfile
28
 
 
768
  try:
769
  with tempfile.NamedTemporaryFile(suffix=".jpg", delete=False) as tmp_img:
770
  cv2.imwrite(tmp_img.name, frame)
771
+ gpt_results = estimate_threat_gpt(tmp_img.name, detections)
772
  logging.info(f"GPT Output for First Frame:\n{gpt_results}") # Expose to HF logs
773
  os.remove(tmp_img.name) # Clean up immediatey
774
 
 
783
  det["gpt_distance_m"] = info.get("distance_m")
784
  det["gpt_direction"] = info.get("direction")
785
  det["gpt_description"] = info.get("description")
786
+ # Threat Intelligence
787
+ det["threat_level_score"] = info.get("threat_level_score")
788
+ det["threat_classification"] = info.get("threat_classification")
789
+ det["weapon_readiness"] = info.get("weapon_readiness")
790
+ # Full Metadata for Feature Table
791
+ det["gpt_raw"] = info
792
 
793
  except Exception as e:
794
  logging.error(f"GPT Distance estimation failed: {e}")
utils/{gpt_distance.py → gpt_reasoning.py} RENAMED
@@ -5,6 +5,7 @@ import logging
5
  from typing import List, Dict, Any, Optional
6
  import urllib.request
7
  import urllib.error
 
8
 
9
  logger = logging.getLogger(__name__)
10
 
@@ -12,34 +13,33 @@ def encode_image(image_path: str) -> str:
12
  with open(image_path, "rb") as image_file:
13
  return base64.b64encode(image_file.read()).decode('utf-8')
14
 
15
- def estimate_distance_gpt(
16
  image_path: str,
17
  detections: List[Dict[str, Any]]
18
  ) -> Dict[str, Any]:
19
  """
20
- Estimate distance and direction for detected objects using GPT-4o.
21
 
22
  Args:
23
  image_path: Path to the image file.
24
  detections: List of detection dicts (bbox, label, etc.).
25
 
26
  Returns:
27
- Dict mapping object ID (e.g., T01) to distance/direction info.
28
  """
29
  api_key = os.environ.get("OPENAI_API_KEY")
30
  if not api_key:
31
- logger.warning("OPENAI_API_KEY not set. Skipping GPT distance estimation.")
32
  return {}
33
 
34
  # 1. Prepare detections summary for prompt
35
- # We assign temporary IDs here if they don't exist, to match what we send to GPT
36
  det_summary = []
37
  for i, det in enumerate(detections):
38
  # UI uses T01, T02... logic usually matches index + 1
39
  obj_id = f"T{str(i+1).zfill(2)}"
40
  bbox = det.get("bbox", [])
41
  label = det.get("label", "object")
42
- det_summary.append(f"- ID: {obj_id}, Label: {label}, BBox: {bbox}")
43
 
44
  det_text = "\n".join(det_summary)
45
 
@@ -53,34 +53,50 @@ def estimate_distance_gpt(
53
  logger.error(f"Failed to encode image for GPT: {e}")
54
  return {}
55
 
56
- # 3. Construct Prompt
57
  system_prompt = (
58
- "You are an expert perception system for an autonomous vehicle or surveillance system. "
59
- "Your task is to estimate the distance (in meters) and direction (relative to the camera) of detected objects in an image. "
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
  "ASSUMPTIONS:\n"
61
- "- The camera is mounted at a standard height (approx 1.5 - 2.0 meters).\n"
62
- "- Standard field of view (~60-90 degrees).\n"
63
- "- Typical object sizes: Person ~1.7m tall, Car ~1.8m wide, Truck ~2.5m wide.\n"
64
- "OUTPUT FORMAT:\n"
65
- "Return STRICT JSON ONLY. Do not include markdown formatting (```json ... ```). "
66
- "The JSON must be an object with a key 'objects' containing a list. "
67
- "Each item in `objects` must have:\n"
68
- "- `id`: The object ID provided in the input.\n"
69
- "- `distance_m`: Estimated distance in meters (float).\n"
70
- "- `direction`: Direction description (e.g., '12 o\\'clock', '1 o\\'clock', '10 o\\'clock'). "
71
- "Assume 12 o'clock is straight ahead.\n"
72
- "- `description`: Brief visual description (e.g., 'Red sedan moving away').\n"
73
  )
74
 
75
  user_prompt = (
76
- f"Analyze this image. The following objects have been detected with bounding boxes [x1, y1, x2, y2]:\n"
77
  f"{det_text}\n\n"
78
- "Provide distance and direction estimates for these objects based on their size and position in the scene."
79
  )
80
 
81
  # 4. Call API
82
  payload = {
83
- "model": "gpt-4o-mini",
84
  "messages": [
85
  {
86
  "role": "system",
@@ -102,8 +118,8 @@ def estimate_distance_gpt(
102
  ]
103
  }
104
  ],
105
- "max_tokens": 1000,
106
- "temperature": 0.2,
107
  "response_format": { "type": "json_object" }
108
  }
109
 
@@ -127,20 +143,38 @@ def estimate_distance_gpt(
127
  logger.warning("GPT returned empty content. Full response: %s", resp_data)
128
  return {}
129
 
130
- # Clean potential markdown headers if GPT ignores instruction
131
- if content.startswith("```json"):
132
- content = content[7:]
133
- if content.endswith("```"):
134
- content = content[:-3]
135
-
136
  result_json = json.loads(content)
137
 
138
- # Map back to a dict: {ID: {data}}
139
- mapped_results = {}
140
- for obj in result_json.get("objects", []):
141
- mapped_results[obj["id"]] = obj
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
142
 
143
- return mapped_results
 
 
 
144
 
145
  except Exception as e:
146
  logger.error(f"GPT API call failed: {e}")
 
5
  from typing import List, Dict, Any, Optional
6
  import urllib.request
7
  import urllib.error
8
+ from utils.schemas import FrameThreatAnalysis
9
 
10
  logger = logging.getLogger(__name__)
11
 
 
13
  with open(image_path, "rb") as image_file:
14
  return base64.b64encode(image_file.read()).decode('utf-8')
15
 
16
+ def estimate_threat_gpt(
17
  image_path: str,
18
  detections: List[Dict[str, Any]]
19
  ) -> Dict[str, Any]:
20
  """
21
+ Perform Naval Threat Assessment on detected objects using GPT-4o.
22
 
23
  Args:
24
  image_path: Path to the image file.
25
  detections: List of detection dicts (bbox, label, etc.).
26
 
27
  Returns:
28
+ Dict mapping object ID (e.g., T01) to NavalThreatAssessment dict.
29
  """
30
  api_key = os.environ.get("OPENAI_API_KEY")
31
  if not api_key:
32
+ logger.warning("OPENAI_API_KEY not set. Skipping GPT threat assessment.")
33
  return {}
34
 
35
  # 1. Prepare detections summary for prompt
 
36
  det_summary = []
37
  for i, det in enumerate(detections):
38
  # UI uses T01, T02... logic usually matches index + 1
39
  obj_id = f"T{str(i+1).zfill(2)}"
40
  bbox = det.get("bbox", [])
41
  label = det.get("label", "object")
42
+ det_summary.append(f"- ID: {obj_id}, Classification Hint: {label}, BBox: {bbox}")
43
 
44
  det_text = "\n".join(det_summary)
45
 
 
53
  logger.error(f"Failed to encode image for GPT: {e}")
54
  return {}
55
 
56
+ # 3. Construct Prompt (Naval Focused)
57
  system_prompt = (
58
+ "You are an elite Naval Intelligence Officer and Threat Analyst. "
59
+ "Your task is to analyze optical surveillance imagery and provide a detailed tactical assessment for every detected object. "
60
+ "You must output a STRICT JSON object that matches the following schema for every object ID provided:\n\n"
61
+ "RESPONSE SCHEMA (JSON):\n"
62
+ "{\n"
63
+ " \"objects\": {\n"
64
+ " \"T01\": {\n"
65
+ " \"vessel_category\": \"Warship\" | \"Commercial\" | \"Fishing\" | \"Small Boat\" | \"Aircraft\" | \"Unknown\",\n"
66
+ " \"specific_class\": \"string (e.g., Arleigh Burke, Skiff)\",\n"
67
+ " \"identity_markers\": [\"string (hull numbers, flags)\"],\n"
68
+ " \"flag_state\": \"string (Country)\",\n"
69
+ " \"visible_weapons\": [\"string\"],\n"
70
+ " \"weapon_readiness\": \"Stowed/PEACE\" | \"Trained/Aiming\" | \"Firing/HOSTILE\",\n"
71
+ " \"sensor_profile\": [\"string (radars)\"],\n"
72
+ " \"motion_status\": \"Dead in Water\" | \"Underway Slow\" | \"Underway Fast\" | \"Flank Speed\",\n"
73
+ " \"wake_description\": \"string\",\n"
74
+ " \"aspect\": \"Bow-on\" | \"Stern-on\" | \"Broadside\",\n"
75
+ " \"range_estimation_nm\": float (Nautical Miles),\n"
76
+ " \"bearing_clock\": \"string (e.g. 12 o'clock)\",\n"
77
+ " \"deck_activity\": \"string\",\n"
78
+ " \"special_features\": [\"string (anomalies)\"],\n"
79
+ " \"threat_level_score\": int (1-10),\n"
80
+ " \"threat_classification\": \"Friendly\" | \"Neutral\" | \"Suspect\" | \"Hostile\",\n"
81
+ " \"tactical_intent\": \"string (e.g., Transit, Attack)\"\n"
82
+ " }\n"
83
+ " }\n"
84
+ "}\n\n"
85
  "ASSUMPTIONS:\n"
86
+ "- Unknown small boats approaching larger vessels are HIGH threat (Suspect/Hostile).\n"
87
+ "- Visible trained weapons are IMMINENT threat (Score 9-10).\n"
88
+ "- Ignore artifacts, focus on the objects."
 
 
 
 
 
 
 
 
 
89
  )
90
 
91
  user_prompt = (
92
+ f"Analyze this naval surveillance image. The following objects have been detected:\n"
93
  f"{det_text}\n\n"
94
+ "Provide a detailed Naval Threat Assessment for each object based on its visual signatures."
95
  )
96
 
97
  # 4. Call API
98
  payload = {
99
+ "model": "gpt-4o", # Use 4o for better vision analysis
100
  "messages": [
101
  {
102
  "role": "system",
 
118
  ]
119
  }
120
  ],
121
+ "max_tokens": 1500,
122
+ "temperature": 0.2, # Low temp for factual consistency
123
  "response_format": { "type": "json_object" }
124
  }
125
 
 
143
  logger.warning("GPT returned empty content. Full response: %s", resp_data)
144
  return {}
145
 
 
 
 
 
 
 
146
  result_json = json.loads(content)
147
 
148
+ objects = result_json.get("objects", {})
149
+
150
+ # Polyfill legacy fields for frontend compatibility
151
+ for obj_id, data in objects.items():
152
+ # 1. Distance: NM -> Meters
153
+ if "range_estimation_nm" in data:
154
+ data["distance_m"] = data["range_estimation_nm"] * 1852.0
155
+ data["gpt_distance_m"] = data["distance_m"] # Explicit legacy key
156
+
157
+ # 2. Direction
158
+ if "bearing_clock" in data:
159
+ data["direction"] = data["bearing_clock"]
160
+ data["gpt_direction"] = data["bearing_clock"]
161
+
162
+ # 3. Description (Summary of new complex fields)
163
+ # e.g. "Warship (Arleigh Burke) - THREAT: 9"
164
+ category = data.get("vessel_category", "Unknown")
165
+ spec = data.get("specific_class", "")
166
+ threat = data.get("threat_classification", "Unknown")
167
+ score = data.get("threat_level_score", 0)
168
+
169
+ desc_parts = [category]
170
+ if spec:
171
+ desc_parts.append(f"({spec})")
172
+ desc_parts.append(f"[{threat.upper()} Lvl:{score}]")
173
 
174
+ data["description"] = " ".join(desc_parts)
175
+ data["gpt_description"] = data["description"]
176
+
177
+ return objects
178
 
179
  except Exception as e:
180
  logger.error(f"GPT API call failed: {e}")
utils/schemas.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic import BaseModel, Field
2
+ from typing import List, Optional, Literal
3
+
4
+ class NavalThreatAssessment(BaseModel):
5
+ """
6
+ Tactical threat assessment for a detected object in a maritime environment.
7
+ """
8
+ # 1. Classification
9
+ vessel_category: Literal["Warship", "Commercial", "Fishing", "Recreational", "Small Boat", "Aircraft", "Unknown"] = Field(..., description="Broad category of the vessel/object.")
10
+ specific_class: Optional[str] = Field(None, description="Specific class if identifiable (e.g., 'Arleigh Burke', 'Dhow', 'Skiff').")
11
+
12
+ # 2. Identification
13
+ identity_markers: List[str] = Field(default_factory=list, description="Visible identifiers: Hull Numbers, Names, Flags, Funnel markings.")
14
+ flag_state: Optional[str] = Field(None, description="Country of origin based on flag or markings.")
15
+
16
+ # 3. Capabilities & Weapons
17
+ visible_weapons: List[str] = Field(default_factory=list, description="Visible weaponry: 'Deck Gun', 'VLS', 'Torpedo Tubes', 'Crew Served Weapons'.")
18
+ weapon_readiness: Literal["Stowed/PEACE", "Manned/Tens", "Trained/Aiming", "Firing/HOSTILE", "Unknown"] = Field(..., description="State of visible weapons.")
19
+
20
+ # 4. Sensors & Electronics
21
+ sensor_profile: List[str] = Field(default_factory=list, description="Visible sensors: 'Rotating Search Radar', 'Fire Control Director', 'Dome'.")
22
+
23
+ # 5. Kinematics
24
+ motion_status: Literal["Dead in Water", "Stationary/Anchored", "Underway Slow", "Underway Fast", "Flank Speed"] = Field(..., description="Movement status based on wake and bow wave.")
25
+ wake_description: Optional[str] = Field(None, description="Description of the wake (e.g., 'Large turbulent wake', 'No wake').")
26
+
27
+ # 6. Spatial / Geometry
28
+ aspect: str = Field(..., description="Target aspect relative to sensor: 'Bow-on', 'Stern-on', 'Broadside Port', 'Broadside Starboard'.")
29
+ range_estimation_nm: float = Field(..., description="Estimated range in Nautical Miles.")
30
+ bearing_clock: str = Field(..., description="Relative bearing in clock format (12 o'clock = Bow).")
31
+
32
+ # 7. Operational Context
33
+ deck_activity: str = Field("None", description="Activity on deck: 'Flight Ops', 'Cargo Handling', 'Personnel gathering', 'Empty'.")
34
+ special_features: List[str] = Field(default_factory=list, description="Anomalies: 'Rust streaks', 'Camouflage', 'Antenna forest', 'RHIBs on davits'.")
35
+
36
+ # 8. Threat Assessment
37
+ threat_level_score: int = Field(..., ge=1, le=10, description="1-10 Threat Score (1=Benign, 10=Imminent Attack).")
38
+ threat_classification: Literal["Friendly", "Neutral", "Suspect", "Hostile"] = Field(..., description="Tactical classification.")
39
+ tactical_intent: str = Field(..., description="Inferred intent: 'Transit', 'Intelligence Gathering', 'Harassment', 'Attack Profile'.")
40
+
41
+ class FrameThreatAnalysis(BaseModel):
42
+ objects: dict[str, NavalThreatAssessment] = Field(..., description="Map of Object ID (e.g., 'T01') to its assessment.")