Spaces:
Sleeping
Sleeping
File size: 6,883 Bytes
8094b21 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 | import os
import json
import base64
import logging
from typing import List, Dict, Any, Optional
import urllib.request
import urllib.error
from utils.schemas import FrameThreatAnalysis
logger = logging.getLogger(__name__)
def encode_image(image_path: str) -> str:
with open(image_path, "rb") as image_file:
return base64.b64encode(image_file.read()).decode('utf-8')
def estimate_threat_gpt(
image_path: str,
detections: List[Dict[str, Any]]
) -> Dict[str, Any]:
"""
Perform Naval Threat Assessment on detected objects using GPT-4o.
Args:
image_path: Path to the image file.
detections: List of detection dicts (bbox, label, etc.).
Returns:
Dict mapping object ID (e.g., T01) to NavalThreatAssessment dict.
"""
api_key = os.environ.get("OPENAI_API_KEY")
if not api_key:
logger.warning("OPENAI_API_KEY not set. Skipping GPT threat assessment.")
return {}
# 1. Prepare detections summary for prompt
det_summary = []
for i, det in enumerate(detections):
# UI uses T01, T02... logic usually matches index + 1
obj_id = f"T{str(i+1).zfill(2)}"
bbox = det.get("bbox", [])
label = det.get("label", "object")
det_summary.append(f"- ID: {obj_id}, Classification Hint: {label}, BBox: {bbox}")
det_text = "\n".join(det_summary)
if not det_text:
return {}
# 2. Encode image
try:
base64_image = encode_image(image_path)
except Exception as e:
logger.error(f"Failed to encode image for GPT: {e}")
return {}
# 3. Construct Prompt (Naval Focused)
system_prompt = (
"You are an elite Naval Intelligence Officer and Threat Analyst. "
"Your task is to analyze optical surveillance imagery and provide a detailed tactical assessment for every detected object. "
"You must output a STRICT JSON object that matches the following schema for every object ID provided:\n\n"
"RESPONSE SCHEMA (JSON):\n"
"{\n"
" \"objects\": {\n"
" \"T01\": {\n"
" \"vessel_category\": \"Warship\" | \"Commercial\" | \"Fishing\" | \"Small Boat\" | \"Aircraft\" | \"Unknown\",\n"
" \"specific_class\": \"string (e.g., Arleigh Burke, Skiff)\",\n"
" \"identity_markers\": [\"string (hull numbers, flags)\"],\n"
" \"flag_state\": \"string (Country)\",\n"
" \"visible_weapons\": [\"string\"],\n"
" \"weapon_readiness\": \"Stowed/PEACE\" | \"Trained/Aiming\" | \"Firing/HOSTILE\",\n"
" \"sensor_profile\": [\"string (radars)\"],\n"
" \"motion_status\": \"Dead in Water\" | \"Underway Slow\" | \"Underway Fast\" | \"Flank Speed\",\n"
" \"wake_description\": \"string\",\n"
" \"aspect\": \"Bow-on\" | \"Stern-on\" | \"Broadside\",\n"
" \"range_estimation_nm\": float (Nautical Miles),\n"
" \"bearing_clock\": \"string (e.g. 12 o'clock)\",\n"
" \"deck_activity\": \"string\",\n"
" \"special_features\": [\"string (anomalies)\"],\n"
" \"threat_level_score\": int (1-10),\n"
" \"threat_classification\": \"Friendly\" | \"Neutral\" | \"Suspect\" | \"Hostile\",\n"
" \"tactical_intent\": \"string (e.g., Transit, Attack)\"\n"
" }\n"
" }\n"
"}\n\n"
"ASSUMPTIONS:\n"
"- Unknown small boats approaching larger vessels are HIGH threat (Suspect/Hostile).\n"
"- Visible trained weapons are IMMINENT threat (Score 9-10).\n"
"- Ignore artifacts, focus on the objects."
)
user_prompt = (
f"Analyze this naval surveillance image. The following objects have been detected:\n"
f"{det_text}\n\n"
"Provide a detailed Naval Threat Assessment for each object based on its visual signatures."
)
# 4. Call API
payload = {
"model": "gpt-4o", # Use 4o for better vision analysis
"messages": [
{
"role": "system",
"content": system_prompt
},
{
"role": "user",
"content": [
{
"type": "text",
"text": user_prompt
},
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{base64_image}"
}
}
]
}
],
"max_tokens": 1500,
"temperature": 0.2, # Low temp for factual consistency
"response_format": { "type": "json_object" }
}
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {api_key}"
}
try:
req = urllib.request.Request(
"https://api.openai.com/v1/chat/completions",
data=json.dumps(payload).encode('utf-8'),
headers=headers,
method="POST"
)
with urllib.request.urlopen(req) as response:
resp_data = json.loads(response.read().decode('utf-8'))
content = resp_data['choices'][0]['message'].get('content')
if not content:
logger.warning("GPT returned empty content. Full response: %s", resp_data)
return {}
result_json = json.loads(content)
objects = result_json.get("objects", {})
# Polyfill legacy fields for frontend compatibility
for obj_id, data in objects.items():
# 1. Distance: NM -> Meters
if "range_estimation_nm" in data:
data["distance_m"] = data["range_estimation_nm"] * 1852.0
data["gpt_distance_m"] = data["distance_m"] # Explicit legacy key
# 2. Direction
if "bearing_clock" in data:
data["direction"] = data["bearing_clock"]
data["gpt_direction"] = data["bearing_clock"]
# 3. Description (Summary of new complex fields)
# e.g. "Warship (Arleigh Burke) - THREAT: 9"
category = data.get("vessel_category", "Unknown")
spec = data.get("specific_class", "")
threat = data.get("threat_classification", "Unknown")
score = data.get("threat_level_score", 0)
desc_parts = [category]
if spec:
desc_parts.append(f"({spec})")
desc_parts.append(f"[{threat.upper()} Lvl:{score}]")
data["description"] = " ".join(desc_parts)
data["gpt_description"] = data["description"]
return objects
except Exception as e:
logger.error(f"GPT API call failed: {e}")
return {}
|