Spaces:
Sleeping
Sleeping
Zhen Ye
commited on
Commit
·
55e372a
1
Parent(s):
58bb3a4
GPT reasoning: add in-memory frame encoding, image_b64 support, use gpt-4o-mini for relevance
Browse files- utils/gpt_reasoning.py +31 -10
- utils/relevance.py +1 -1
utils/gpt_reasoning.py
CHANGED
|
@@ -15,6 +15,15 @@ def encode_image(image_path: str) -> str:
|
|
| 15 |
return base64.b64encode(image_file.read()).decode('utf-8')
|
| 16 |
|
| 17 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
_DOMAIN_ROLES = {
|
| 19 |
"NAVAL": "Naval Intelligence Officer and Maritime Threat Analyst",
|
| 20 |
"GROUND": "Ground Surveillance Intelligence Officer",
|
|
@@ -110,21 +119,26 @@ def _build_domain_system_prompt(domain: str, mission_spec=None) -> str:
|
|
| 110 |
|
| 111 |
|
| 112 |
def estimate_threat_gpt(
|
| 113 |
-
image_path: str,
|
| 114 |
-
detections: List[Dict[str, Any]],
|
| 115 |
mission_spec=None, # Optional[MissionSpecification]
|
|
|
|
| 116 |
) -> Dict[str, Any]:
|
| 117 |
"""
|
| 118 |
Perform Threat Assessment on detected objects using GPT-4o.
|
| 119 |
|
| 120 |
Args:
|
| 121 |
-
image_path: Path to the image file.
|
| 122 |
detections: List of detection dicts (bbox, label, etc.).
|
| 123 |
mission_spec: Optional MissionSpecification for domain-aware assessment.
|
|
|
|
| 124 |
|
| 125 |
Returns:
|
| 126 |
Dict mapping object ID (e.g., T01) to threat assessment dict.
|
| 127 |
"""
|
|
|
|
|
|
|
|
|
|
| 128 |
api_key = os.environ.get("OPENAI_API_KEY")
|
| 129 |
if not api_key:
|
| 130 |
logger.warning("OPENAI_API_KEY not set. Skipping GPT threat assessment.")
|
|
@@ -143,11 +157,17 @@ def estimate_threat_gpt(
|
|
| 143 |
if not det_text:
|
| 144 |
return {}
|
| 145 |
|
| 146 |
-
# 2. Encode image
|
| 147 |
-
|
| 148 |
-
base64_image =
|
| 149 |
-
|
| 150 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 151 |
return {}
|
| 152 |
|
| 153 |
# 3. Domain-aware prompt selection (INV-7)
|
|
@@ -184,7 +204,8 @@ def estimate_threat_gpt(
|
|
| 184 |
{
|
| 185 |
"type": "image_url",
|
| 186 |
"image_url": {
|
| 187 |
-
"url": f"data:image/jpeg;base64,{base64_image}"
|
|
|
|
| 188 |
}
|
| 189 |
}
|
| 190 |
]
|
|
@@ -207,7 +228,7 @@ def estimate_threat_gpt(
|
|
| 207 |
headers=headers,
|
| 208 |
method="POST"
|
| 209 |
)
|
| 210 |
-
with urllib.request.urlopen(req) as response:
|
| 211 |
resp_data = json.loads(response.read().decode('utf-8'))
|
| 212 |
|
| 213 |
content = resp_data['choices'][0]['message'].get('content')
|
|
|
|
| 15 |
return base64.b64encode(image_file.read()).decode('utf-8')
|
| 16 |
|
| 17 |
|
| 18 |
+
def encode_frame_to_b64(frame) -> str:
|
| 19 |
+
"""Encode an OpenCV BGR frame to a base64 JPEG string in memory (no disk I/O)."""
|
| 20 |
+
import cv2
|
| 21 |
+
success, buf = cv2.imencode('.jpg', frame)
|
| 22 |
+
if not success:
|
| 23 |
+
raise ValueError("Failed to encode frame to JPEG")
|
| 24 |
+
return base64.b64encode(buf.tobytes()).decode('utf-8')
|
| 25 |
+
|
| 26 |
+
|
| 27 |
_DOMAIN_ROLES = {
|
| 28 |
"NAVAL": "Naval Intelligence Officer and Maritime Threat Analyst",
|
| 29 |
"GROUND": "Ground Surveillance Intelligence Officer",
|
|
|
|
| 119 |
|
| 120 |
|
| 121 |
def estimate_threat_gpt(
|
| 122 |
+
image_path: Optional[str] = None,
|
| 123 |
+
detections: Optional[List[Dict[str, Any]]] = None,
|
| 124 |
mission_spec=None, # Optional[MissionSpecification]
|
| 125 |
+
image_b64: Optional[str] = None,
|
| 126 |
) -> Dict[str, Any]:
|
| 127 |
"""
|
| 128 |
Perform Threat Assessment on detected objects using GPT-4o.
|
| 129 |
|
| 130 |
Args:
|
| 131 |
+
image_path: Path to the image file (mutually exclusive with image_b64).
|
| 132 |
detections: List of detection dicts (bbox, label, etc.).
|
| 133 |
mission_spec: Optional MissionSpecification for domain-aware assessment.
|
| 134 |
+
image_b64: Pre-encoded base64 JPEG string (avoids disk round-trip).
|
| 135 |
|
| 136 |
Returns:
|
| 137 |
Dict mapping object ID (e.g., T01) to threat assessment dict.
|
| 138 |
"""
|
| 139 |
+
if detections is None:
|
| 140 |
+
detections = []
|
| 141 |
+
|
| 142 |
api_key = os.environ.get("OPENAI_API_KEY")
|
| 143 |
if not api_key:
|
| 144 |
logger.warning("OPENAI_API_KEY not set. Skipping GPT threat assessment.")
|
|
|
|
| 157 |
if not det_text:
|
| 158 |
return {}
|
| 159 |
|
| 160 |
+
# 2. Encode image (prefer pre-encoded b64 to avoid disk I/O)
|
| 161 |
+
if image_b64:
|
| 162 |
+
base64_image = image_b64
|
| 163 |
+
elif image_path:
|
| 164 |
+
try:
|
| 165 |
+
base64_image = encode_image(image_path)
|
| 166 |
+
except Exception as e:
|
| 167 |
+
logger.error(f"Failed to encode image for GPT: {e}")
|
| 168 |
+
return {}
|
| 169 |
+
else:
|
| 170 |
+
logger.error("estimate_threat_gpt: no image_path or image_b64 provided")
|
| 171 |
return {}
|
| 172 |
|
| 173 |
# 3. Domain-aware prompt selection (INV-7)
|
|
|
|
| 204 |
{
|
| 205 |
"type": "image_url",
|
| 206 |
"image_url": {
|
| 207 |
+
"url": f"data:image/jpeg;base64,{base64_image}",
|
| 208 |
+
"detail": "low"
|
| 209 |
}
|
| 210 |
}
|
| 211 |
]
|
|
|
|
| 228 |
headers=headers,
|
| 229 |
method="POST"
|
| 230 |
)
|
| 231 |
+
with urllib.request.urlopen(req, timeout=30) as response:
|
| 232 |
resp_data = json.loads(response.read().decode('utf-8'))
|
| 233 |
|
| 234 |
content = resp_data['choices'][0]['message'].get('content')
|
utils/relevance.py
CHANGED
|
@@ -108,7 +108,7 @@ def evaluate_relevance_llm(
|
|
| 108 |
)
|
| 109 |
|
| 110 |
payload = {
|
| 111 |
-
"model": "gpt-4o",
|
| 112 |
"temperature": 0.0,
|
| 113 |
"max_tokens": 200,
|
| 114 |
"response_format": {"type": "json_object"},
|
|
|
|
| 108 |
)
|
| 109 |
|
| 110 |
payload = {
|
| 111 |
+
"model": "gpt-4o-mini",
|
| 112 |
"temperature": 0.0,
|
| 113 |
"max_tokens": 200,
|
| 114 |
"response_format": {"type": "json_object"},
|