Zhen Ye commited on
Commit
55e372a
·
1 Parent(s): 58bb3a4

GPT reasoning: add in-memory frame encoding, image_b64 support, use gpt-4o-mini for relevance

Browse files
Files changed (2) hide show
  1. utils/gpt_reasoning.py +31 -10
  2. utils/relevance.py +1 -1
utils/gpt_reasoning.py CHANGED
@@ -15,6 +15,15 @@ def encode_image(image_path: str) -> str:
15
  return base64.b64encode(image_file.read()).decode('utf-8')
16
 
17
 
 
 
 
 
 
 
 
 
 
18
  _DOMAIN_ROLES = {
19
  "NAVAL": "Naval Intelligence Officer and Maritime Threat Analyst",
20
  "GROUND": "Ground Surveillance Intelligence Officer",
@@ -110,21 +119,26 @@ def _build_domain_system_prompt(domain: str, mission_spec=None) -> str:
110
 
111
 
112
  def estimate_threat_gpt(
113
- image_path: str,
114
- detections: List[Dict[str, Any]],
115
  mission_spec=None, # Optional[MissionSpecification]
 
116
  ) -> Dict[str, Any]:
117
  """
118
  Perform Threat Assessment on detected objects using GPT-4o.
119
 
120
  Args:
121
- image_path: Path to the image file.
122
  detections: List of detection dicts (bbox, label, etc.).
123
  mission_spec: Optional MissionSpecification for domain-aware assessment.
 
124
 
125
  Returns:
126
  Dict mapping object ID (e.g., T01) to threat assessment dict.
127
  """
 
 
 
128
  api_key = os.environ.get("OPENAI_API_KEY")
129
  if not api_key:
130
  logger.warning("OPENAI_API_KEY not set. Skipping GPT threat assessment.")
@@ -143,11 +157,17 @@ def estimate_threat_gpt(
143
  if not det_text:
144
  return {}
145
 
146
- # 2. Encode image
147
- try:
148
- base64_image = encode_image(image_path)
149
- except Exception as e:
150
- logger.error(f"Failed to encode image for GPT: {e}")
 
 
 
 
 
 
151
  return {}
152
 
153
  # 3. Domain-aware prompt selection (INV-7)
@@ -184,7 +204,8 @@ def estimate_threat_gpt(
184
  {
185
  "type": "image_url",
186
  "image_url": {
187
- "url": f"data:image/jpeg;base64,{base64_image}"
 
188
  }
189
  }
190
  ]
@@ -207,7 +228,7 @@ def estimate_threat_gpt(
207
  headers=headers,
208
  method="POST"
209
  )
210
- with urllib.request.urlopen(req) as response:
211
  resp_data = json.loads(response.read().decode('utf-8'))
212
 
213
  content = resp_data['choices'][0]['message'].get('content')
 
15
  return base64.b64encode(image_file.read()).decode('utf-8')
16
 
17
 
18
+ def encode_frame_to_b64(frame) -> str:
19
+ """Encode an OpenCV BGR frame to a base64 JPEG string in memory (no disk I/O)."""
20
+ import cv2
21
+ success, buf = cv2.imencode('.jpg', frame)
22
+ if not success:
23
+ raise ValueError("Failed to encode frame to JPEG")
24
+ return base64.b64encode(buf.tobytes()).decode('utf-8')
25
+
26
+
27
  _DOMAIN_ROLES = {
28
  "NAVAL": "Naval Intelligence Officer and Maritime Threat Analyst",
29
  "GROUND": "Ground Surveillance Intelligence Officer",
 
119
 
120
 
121
  def estimate_threat_gpt(
122
+ image_path: Optional[str] = None,
123
+ detections: Optional[List[Dict[str, Any]]] = None,
124
  mission_spec=None, # Optional[MissionSpecification]
125
+ image_b64: Optional[str] = None,
126
  ) -> Dict[str, Any]:
127
  """
128
  Perform Threat Assessment on detected objects using GPT-4o.
129
 
130
  Args:
131
+ image_path: Path to the image file (mutually exclusive with image_b64).
132
  detections: List of detection dicts (bbox, label, etc.).
133
  mission_spec: Optional MissionSpecification for domain-aware assessment.
134
+ image_b64: Pre-encoded base64 JPEG string (avoids disk round-trip).
135
 
136
  Returns:
137
  Dict mapping object ID (e.g., T01) to threat assessment dict.
138
  """
139
+ if detections is None:
140
+ detections = []
141
+
142
  api_key = os.environ.get("OPENAI_API_KEY")
143
  if not api_key:
144
  logger.warning("OPENAI_API_KEY not set. Skipping GPT threat assessment.")
 
157
  if not det_text:
158
  return {}
159
 
160
+ # 2. Encode image (prefer pre-encoded b64 to avoid disk I/O)
161
+ if image_b64:
162
+ base64_image = image_b64
163
+ elif image_path:
164
+ try:
165
+ base64_image = encode_image(image_path)
166
+ except Exception as e:
167
+ logger.error(f"Failed to encode image for GPT: {e}")
168
+ return {}
169
+ else:
170
+ logger.error("estimate_threat_gpt: no image_path or image_b64 provided")
171
  return {}
172
 
173
  # 3. Domain-aware prompt selection (INV-7)
 
204
  {
205
  "type": "image_url",
206
  "image_url": {
207
+ "url": f"data:image/jpeg;base64,{base64_image}",
208
+ "detail": "low"
209
  }
210
  }
211
  ]
 
228
  headers=headers,
229
  method="POST"
230
  )
231
+ with urllib.request.urlopen(req, timeout=30) as response:
232
  resp_data = json.loads(response.read().decode('utf-8'))
233
 
234
  content = resp_data['choices'][0]['message'].get('content')
utils/relevance.py CHANGED
@@ -108,7 +108,7 @@ def evaluate_relevance_llm(
108
  )
109
 
110
  payload = {
111
- "model": "gpt-4o",
112
  "temperature": 0.0,
113
  "max_tokens": 200,
114
  "response_format": {"type": "json_object"},
 
108
  )
109
 
110
  payload = {
111
+ "model": "gpt-4o-mini",
112
  "temperature": 0.0,
113
  "max_tokens": 200,
114
  "response_format": {"type": "json_object"},