perception2 / utils /gpt_reasoning.py
Zhen Ye
feat(threat-assessment): implement naval threat analysis with GPT-4o\n\n- Rename utils/gpt_distance.py to utils/gpt_reasoning.py and update logic for 15 naval threat features\n- Add Pydantic schemas for NavalThreatAssessment in utils/schemas.py\n- Update backend (app.py, inference.py) to use new threat estimation and pass full metadata\n- refactor(frontend): render threat level badges and detailed feature table in UI
8094b21
import os
import json
import base64
import logging
from typing import List, Dict, Any, Optional
import urllib.request
import urllib.error
from utils.schemas import FrameThreatAnalysis
logger = logging.getLogger(__name__)
def encode_image(image_path: str) -> str:
with open(image_path, "rb") as image_file:
return base64.b64encode(image_file.read()).decode('utf-8')
def estimate_threat_gpt(
image_path: str,
detections: List[Dict[str, Any]]
) -> Dict[str, Any]:
"""
Perform Naval Threat Assessment on detected objects using GPT-4o.
Args:
image_path: Path to the image file.
detections: List of detection dicts (bbox, label, etc.).
Returns:
Dict mapping object ID (e.g., T01) to NavalThreatAssessment dict.
"""
api_key = os.environ.get("OPENAI_API_KEY")
if not api_key:
logger.warning("OPENAI_API_KEY not set. Skipping GPT threat assessment.")
return {}
# 1. Prepare detections summary for prompt
det_summary = []
for i, det in enumerate(detections):
# UI uses T01, T02... logic usually matches index + 1
obj_id = f"T{str(i+1).zfill(2)}"
bbox = det.get("bbox", [])
label = det.get("label", "object")
det_summary.append(f"- ID: {obj_id}, Classification Hint: {label}, BBox: {bbox}")
det_text = "\n".join(det_summary)
if not det_text:
return {}
# 2. Encode image
try:
base64_image = encode_image(image_path)
except Exception as e:
logger.error(f"Failed to encode image for GPT: {e}")
return {}
# 3. Construct Prompt (Naval Focused)
system_prompt = (
"You are an elite Naval Intelligence Officer and Threat Analyst. "
"Your task is to analyze optical surveillance imagery and provide a detailed tactical assessment for every detected object. "
"You must output a STRICT JSON object that matches the following schema for every object ID provided:\n\n"
"RESPONSE SCHEMA (JSON):\n"
"{\n"
" \"objects\": {\n"
" \"T01\": {\n"
" \"vessel_category\": \"Warship\" | \"Commercial\" | \"Fishing\" | \"Small Boat\" | \"Aircraft\" | \"Unknown\",\n"
" \"specific_class\": \"string (e.g., Arleigh Burke, Skiff)\",\n"
" \"identity_markers\": [\"string (hull numbers, flags)\"],\n"
" \"flag_state\": \"string (Country)\",\n"
" \"visible_weapons\": [\"string\"],\n"
" \"weapon_readiness\": \"Stowed/PEACE\" | \"Trained/Aiming\" | \"Firing/HOSTILE\",\n"
" \"sensor_profile\": [\"string (radars)\"],\n"
" \"motion_status\": \"Dead in Water\" | \"Underway Slow\" | \"Underway Fast\" | \"Flank Speed\",\n"
" \"wake_description\": \"string\",\n"
" \"aspect\": \"Bow-on\" | \"Stern-on\" | \"Broadside\",\n"
" \"range_estimation_nm\": float (Nautical Miles),\n"
" \"bearing_clock\": \"string (e.g. 12 o'clock)\",\n"
" \"deck_activity\": \"string\",\n"
" \"special_features\": [\"string (anomalies)\"],\n"
" \"threat_level_score\": int (1-10),\n"
" \"threat_classification\": \"Friendly\" | \"Neutral\" | \"Suspect\" | \"Hostile\",\n"
" \"tactical_intent\": \"string (e.g., Transit, Attack)\"\n"
" }\n"
" }\n"
"}\n\n"
"ASSUMPTIONS:\n"
"- Unknown small boats approaching larger vessels are HIGH threat (Suspect/Hostile).\n"
"- Visible trained weapons are IMMINENT threat (Score 9-10).\n"
"- Ignore artifacts, focus on the objects."
)
user_prompt = (
f"Analyze this naval surveillance image. The following objects have been detected:\n"
f"{det_text}\n\n"
"Provide a detailed Naval Threat Assessment for each object based on its visual signatures."
)
# 4. Call API
payload = {
"model": "gpt-4o", # Use 4o for better vision analysis
"messages": [
{
"role": "system",
"content": system_prompt
},
{
"role": "user",
"content": [
{
"type": "text",
"text": user_prompt
},
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{base64_image}"
}
}
]
}
],
"max_tokens": 1500,
"temperature": 0.2, # Low temp for factual consistency
"response_format": { "type": "json_object" }
}
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {api_key}"
}
try:
req = urllib.request.Request(
"https://api.openai.com/v1/chat/completions",
data=json.dumps(payload).encode('utf-8'),
headers=headers,
method="POST"
)
with urllib.request.urlopen(req) as response:
resp_data = json.loads(response.read().decode('utf-8'))
content = resp_data['choices'][0]['message'].get('content')
if not content:
logger.warning("GPT returned empty content. Full response: %s", resp_data)
return {}
result_json = json.loads(content)
objects = result_json.get("objects", {})
# Polyfill legacy fields for frontend compatibility
for obj_id, data in objects.items():
# 1. Distance: NM -> Meters
if "range_estimation_nm" in data:
data["distance_m"] = data["range_estimation_nm"] * 1852.0
data["gpt_distance_m"] = data["distance_m"] # Explicit legacy key
# 2. Direction
if "bearing_clock" in data:
data["direction"] = data["bearing_clock"]
data["gpt_direction"] = data["bearing_clock"]
# 3. Description (Summary of new complex fields)
# e.g. "Warship (Arleigh Burke) - THREAT: 9"
category = data.get("vessel_category", "Unknown")
spec = data.get("specific_class", "")
threat = data.get("threat_classification", "Unknown")
score = data.get("threat_level_score", 0)
desc_parts = [category]
if spec:
desc_parts.append(f"({spec})")
desc_parts.append(f"[{threat.upper()} Lvl:{score}]")
data["description"] = " ".join(desc_parts)
data["gpt_description"] = data["description"]
return objects
except Exception as e:
logger.error(f"GPT API call failed: {e}")
return {}