Arko007 commited on
Commit
24b169f
·
verified ·
1 Parent(s): 4f41596

Update processing.py

Browse files
Files changed (1) hide show
  1. processing.py +293 -116
processing.py CHANGED
@@ -1,21 +1,26 @@
 
1
  """
2
- Image processing pipeline for SUB-SENTINEL.
3
-
4
- Provides three functions:
5
- enhance_image(raw_bytes) -> (base64_str, numpy_array)
6
- run_detection(image_array) -> list[dict]
7
- build_heatmap(image_array) -> base64_str
8
-
9
- All heavy-weight model paths gracefully fall back to CPU-friendly alternatives
10
- when model weights are absent. Use the environment variable DETECTION_MODEL
11
- to override the default detection model (e.g. "yolov8m.pt" or a local path).
 
 
 
12
  """
13
-
14
  import os
15
  import io
 
 
16
  import base64
17
  import logging
18
- from typing import Optional, List, Dict
19
 
20
  import cv2
21
  import numpy as np
@@ -25,16 +30,13 @@ from skimage.metrics import structural_similarity as ssim
25
  logger = logging.getLogger(__name__)
26
  logger.addHandler(logging.NullHandler())
27
 
28
- # ---------------------------------------------------------------------------
29
- # Default detection model (change via env var DETECTION_MODEL if needed)
30
- # ---------------------------------------------------------------------------
31
- # NOTE: default changed to yolov8m for improved accuracy.
32
  DEFAULT_DETECTION_MODEL = os.getenv("DETECTION_MODEL", "yolov8m.pt")
 
 
33
 
34
- # ---------------------------------------------------------------------------
35
- # Maritime label mapping for YOLOv8 COCO classes
36
- # ---------------------------------------------------------------------------
37
- _LABEL_MAP: dict[str, str] = {
38
  "person": "Diver/Swimmer",
39
  "boat": "Surface/Sub Threat",
40
  "ship": "Surface/Sub Threat",
@@ -43,21 +45,18 @@ _LABEL_MAP: dict[str, str] = {
43
  # extend as needed
44
  }
45
 
46
-
47
  # --------------------------- utilities -------------------------------------
48
- def _array_to_base64(img_array: np.ndarray, fmt: str = "JPEG") -> str:
49
- """Convert a uint8 numpy array (H×W×C, RGB) to a base-64 data-URI string."""
50
  pil_img = Image.fromarray(img_array.astype(np.uint8))
51
  buf = io.BytesIO()
52
  fmt_upper = fmt.upper()
53
  pil_img.save(buf, format=fmt_upper, quality=90)
54
  encoded = base64.b64encode(buf.getvalue()).decode("utf-8")
55
- mime = "image/jpeg" if fmt_upper == "JPEG" else "image/png"
56
  return f"data:{mime};base64,{encoded}"
57
 
58
 
59
  def _bytes_to_array(raw_bytes: bytes) -> np.ndarray:
60
- """Decode raw image bytes to a uint8 RGB numpy array."""
61
  nparr = np.frombuffer(raw_bytes, np.uint8)
62
  bgr = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
63
  if bgr is None:
@@ -65,158 +64,336 @@ def _bytes_to_array(raw_bytes: bytes) -> np.ndarray:
65
  return cv2.cvtColor(bgr, cv2.COLOR_BGR2RGB)
66
 
67
 
68
- # ---------------------------------------------------------------------------
69
- # 1. Underwater image enhancement
70
- # ---------------------------------------------------------------------------
 
 
71
  def _clahe_enhance(rgb: np.ndarray) -> np.ndarray:
72
- """
73
- CPU-friendly underwater enhancement using CLAHE on LAB colour space.
74
- Used when FUnIE-GAN weights are unavailable.
75
- """
76
  lab = cv2.cvtColor(rgb, cv2.COLOR_RGB2LAB)
77
- l_channel, a_channel, b_channel = cv2.split(lab)
78
  clahe = cv2.createCLAHE(clipLimit=2.5, tileGridSize=(8, 8))
79
- l_channel = clahe.apply(l_channel)
80
- # Slight blue-green colour correction typical for underwater footage
81
- a_channel = np.clip(a_channel.astype(np.int16) - 5, 0, 255).astype(np.uint8)
82
- b_channel = np.clip(b_channel.astype(np.int16) + 10, 0, 255).astype(np.uint8)
83
- enhanced_lab = cv2.merge([l_channel, a_channel, b_channel])
84
- return cv2.cvtColor(enhanced_lab, cv2.COLOR_LAB2RGB)
85
 
86
 
87
  def _funiegan_enhance(rgb: np.ndarray) -> Optional[np.ndarray]:
88
- """
89
- Attempt FUnIE-GAN inference via a local ONNX weight file.
90
- Returns None if weights are missing so the caller can fall back.
91
- """
92
  weights_path = "weights/funiegan.onnx"
 
 
93
  try:
94
- if not os.path.exists(weights_path):
95
- return None
96
  net = cv2.dnn.readNetFromONNX(weights_path)
97
  h, w = rgb.shape[:2]
98
- target_h, target_w = 256, 256
99
- resized = cv2.resize(rgb, (target_w, target_h)).astype(np.float32) / 127.5 - 1.0
100
  blob = cv2.dnn.blobFromImage(resized)
101
  net.setInput(blob)
102
  out = net.forward()
103
- # out shape may be (1, C, H, W)
104
  out_img = ((out[0].transpose(1, 2, 0) + 1.0) * 127.5).clip(0, 255).astype(np.uint8)
105
  return cv2.resize(out_img, (w, h))
106
  except Exception as exc:
107
- logger.warning("FUnIE-GAN inference failed (%s); falling back to CLAHE.", exc)
108
  return None
109
 
110
 
111
- def enhance_image(raw_bytes: bytes) -> tuple[str, np.ndarray]:
112
- """
113
- Enhance an underwater image.
114
-
115
- Returns:
116
- (base64_enhanced, original_rgb_array)
117
- The original array is returned unchanged for use in downstream steps.
118
- """
119
  rgb = _bytes_to_array(raw_bytes)
120
- enhanced = _funiegan_enhance(rgb)
 
 
121
  if enhanced is None:
122
  enhanced = _clahe_enhance(rgb)
123
- return _array_to_base64(enhanced), rgb
124
 
125
 
126
- # ---------------------------------------------------------------------------
127
- # 2. Object detection (YOLOv8 family; default is yolov8m.pt)
128
- # ---------------------------------------------------------------------------
129
- def run_detection(rgb: np.ndarray, conf_thresh: float = 0.30) -> List[dict]:
130
- """
131
- Run YOLO detection (model chosen by DETECTION_MODEL env var or default)
132
- and map labels to maritime terminology.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
133
 
134
- Returns a list of detection dicts:
135
- {class, mapped_label, confidence, bbox: [x1, y1, x2, y2]}
 
 
 
 
 
 
 
 
 
 
 
136
  """
 
137
  try:
138
- # Lazy import to avoid heavy dependency cost at module import time
139
- from ultralytics import YOLO # type: ignore
140
  except Exception as exc:
141
- logger.warning("ultralytics package not available (%s); detection disabled.", exc)
142
- return []
143
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
144
  model_path = os.getenv("DETECTION_MODEL", DEFAULT_DETECTION_MODEL)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
145
  try:
146
- model = YOLO(model_path)
147
  except Exception as exc:
148
- logger.warning("Failed to load detection model '%s' (%s). Returning empty.", model_path, exc)
149
  return []
150
 
151
  try:
152
- # Model accepts numpy image (RGB) directly
153
  results = model(rgb, verbose=False)
154
  except Exception as exc:
155
- logger.warning("Model inference failed (%s). Returning empty.", exc)
156
  return []
157
 
158
- detections: List[dict] = []
 
159
  for result in results:
160
  boxes = getattr(result, "boxes", None)
161
  if boxes is None:
162
  continue
163
  for box in boxes:
164
  try:
165
- # Defensive extraction: the ultralytics API returns tensors/arrays
166
  conf = float(box.conf[0]) if hasattr(box.conf, "__len__") else float(box.conf)
167
  if conf < conf_thresh:
168
  continue
169
-
170
  cls_id = int(box.cls[0]) if hasattr(box.cls, "__len__") else int(box.cls)
171
  cls_name = model.names.get(cls_id, str(cls_id)) if hasattr(model, "names") else str(cls_id)
172
-
173
  xyxy = box.xyxy[0] if hasattr(box.xyxy, "__len__") and len(box.xyxy) > 0 else None
174
  if xyxy is None:
175
  continue
176
- x1, y1, x2, y2 = (float(v) for v in xyxy)
 
 
 
 
 
 
 
177
  detections.append({
178
  "class": cls_name,
179
  "mapped_label": _LABEL_MAP.get(cls_name, cls_name),
180
  "confidence": round(conf, 4),
181
- "bbox": [round(x1), round(y1), round(x2), round(y2)],
 
 
182
  })
183
  except Exception as exc:
184
- logger.debug("Skipping box due to extraction error: %s", exc)
185
  continue
186
 
187
- return detections
188
-
189
-
190
- # ---------------------------------------------------------------------------
191
- # 3. SSIM-based forensic heatmap
192
- # ---------------------------------------------------------------------------
193
- def build_heatmap(rgb: np.ndarray) -> str:
194
- """
195
- Generate a forensic heatmap by comparing the original image against a
196
- Gaussian-blurred reference. High SSIM -> green; low SSIM -> red.
197
- Returns a base64-encoded PNG heatmap (data URI).
198
- """
199
- gray = cv2.cvtColor(rgb, cv2.COLOR_RGB2GRAY)
200
- blurred = cv2.GaussianBlur(gray, (15, 15), 0)
201
-
202
- # Compute SSIM score map; fallback to simple difference if it fails
203
- try:
204
- _, ssim_map = ssim(gray, blurred, full=True, data_range=255)
205
- except Exception as exc:
206
- logger.warning("SSIM computation failed (%s); falling back to absdiff.", exc)
207
- diff = cv2.absdiff(gray, blurred).astype(np.float32)
208
- ssim_map = 1.0 - (diff / 255.0)
209
-
210
- # Normalise to [0, 255]
211
- ssim_norm = ((ssim_map + 1.0) / 2.0 * 255.0).clip(0, 255).astype(np.uint8)
212
 
213
- # Map to BGR: low similarity -> red, high -> green
214
- colormap = cv2.COLORMAP_RdYlGn if hasattr(cv2, "COLORMAP_RdYlGn") else cv2.COLORMAP_JET
215
- heatmap_bgr = cv2.applyColorMap(ssim_norm, colormap)
216
 
217
- # Blend with original for context
218
- rgb_bgr = cv2.cvtColor(rgb, cv2.COLOR_RGB2BGR)
219
- overlay = cv2.addWeighted(rgb_bgr, 0.55, heatmap_bgr, 0.45, 0)
220
- overlay_rgb = cv2.cvtColor(overlay, cv2.COLOR_BGR2RGB)
221
 
222
- return _array_to_base64(overlay_rgb, fmt="PNG")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # processing.py
2
  """
3
+ SUB-SENTINEL processing pipeline (Groq-first, Ultralytics fallback).
4
+
5
+ Exports:
6
+ enhance_image(raw_bytes) -> (base64_str, np.ndarray)
7
+ run_detection(rgb, sonar_data=None, conf_thresh=0.40) -> list[dict]
8
+ build_heatmap(rgb) -> base64_str
9
+ fuse_sonar_overlay(rgb, sonar_data) -> base64_str
10
+ generate_vector_sketch(detections) -> str (base64 zlib JSON)
11
+
12
+ Environment:
13
+ DETECTION_BACKEND = "groq" | "ultralytics" | "auto" (default "auto")
14
+ DETECTION_MODEL = path to model / compiled groq artifact or ultralytics model id (default "yolov8m.pt")
15
+ GROQ_API_KEY = optional API key for Groq LLM (if you want LLM postprocessing)
16
  """
 
17
  import os
18
  import io
19
+ import json
20
+ import zlib
21
  import base64
22
  import logging
23
+ from typing import Optional, List, Dict, Any
24
 
25
  import cv2
26
  import numpy as np
 
30
  logger = logging.getLogger(__name__)
31
  logger.addHandler(logging.NullHandler())
32
 
33
+ # Config
 
 
 
34
  DEFAULT_DETECTION_MODEL = os.getenv("DETECTION_MODEL", "yolov8m.pt")
35
+ DETECTION_BACKEND = os.getenv("DETECTION_BACKEND", "auto").lower() # "groq", "ultralytics", "auto"
36
+ GROQ_API_KEY = os.getenv("GROQ_API_KEY") or os.getenv("groq") # read common variants
37
 
38
+ # Maritime label mapping (COCO -> maritime)
39
+ _LABEL_MAP: Dict[str, str] = {
 
 
40
  "person": "Diver/Swimmer",
41
  "boat": "Surface/Sub Threat",
42
  "ship": "Surface/Sub Threat",
 
45
  # extend as needed
46
  }
47
 
 
48
  # --------------------------- utilities -------------------------------------
49
+ def _array_to_base64(img_array: np.ndarray, fmt: str = "PNG") -> str:
 
50
  pil_img = Image.fromarray(img_array.astype(np.uint8))
51
  buf = io.BytesIO()
52
  fmt_upper = fmt.upper()
53
  pil_img.save(buf, format=fmt_upper, quality=90)
54
  encoded = base64.b64encode(buf.getvalue()).decode("utf-8")
55
+ mime = "image/png" if fmt_upper == "PNG" else "image/jpeg"
56
  return f"data:{mime};base64,{encoded}"
57
 
58
 
59
  def _bytes_to_array(raw_bytes: bytes) -> np.ndarray:
 
60
  nparr = np.frombuffer(raw_bytes, np.uint8)
61
  bgr = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
62
  if bgr is None:
 
64
  return cv2.cvtColor(bgr, cv2.COLOR_BGR2RGB)
65
 
66
 
67
+ def _ensure_int_box(box: List[float]) -> List[int]:
68
+ return [int(round(v)) for v in box]
69
+
70
+
71
+ # ------------------------ enhancement engines -------------------------------
72
  def _clahe_enhance(rgb: np.ndarray) -> np.ndarray:
 
 
 
 
73
  lab = cv2.cvtColor(rgb, cv2.COLOR_RGB2LAB)
74
+ l, a, b = cv2.split(lab)
75
  clahe = cv2.createCLAHE(clipLimit=2.5, tileGridSize=(8, 8))
76
+ l = clahe.apply(l)
77
+ a = np.clip(a.astype(np.int16) - 5, 0, 255).astype(np.uint8)
78
+ b = np.clip(b.astype(np.int16) + 10, 0, 255).astype(np.uint8)
79
+ merged = cv2.merge([l, a, b])
80
+ return cv2.cvtColor(merged, cv2.COLOR_LAB2RGB)
 
81
 
82
 
83
  def _funiegan_enhance(rgb: np.ndarray) -> Optional[np.ndarray]:
 
 
 
 
84
  weights_path = "weights/funiegan.onnx"
85
+ if not os.path.exists(weights_path):
86
+ return None
87
  try:
 
 
88
  net = cv2.dnn.readNetFromONNX(weights_path)
89
  h, w = rgb.shape[:2]
90
+ resized = cv2.resize(rgb, (256, 256)).astype(np.float32) / 127.5 - 1.0
 
91
  blob = cv2.dnn.blobFromImage(resized)
92
  net.setInput(blob)
93
  out = net.forward()
 
94
  out_img = ((out[0].transpose(1, 2, 0) + 1.0) * 127.5).clip(0, 255).astype(np.uint8)
95
  return cv2.resize(out_img, (w, h))
96
  except Exception as exc:
97
+ logger.warning("FUnIE-GAN inference failed (%s); falling back.", exc)
98
  return None
99
 
100
 
101
+ def enhance_image(raw_bytes: bytes, prefer_funiegan: bool = True) -> tuple[str, np.ndarray]:
 
 
 
 
 
 
 
102
  rgb = _bytes_to_array(raw_bytes)
103
+ enhanced = None
104
+ if prefer_funiegan:
105
+ enhanced = _funiegan_enhance(rgb)
106
  if enhanced is None:
107
  enhanced = _clahe_enhance(rgb)
108
+ return _array_to_base64(enhanced, fmt="JPEG"), rgb
109
 
110
 
111
+ # ------------------------- forensic heatmap --------------------------------
112
+ def build_heatmap(rgb: np.ndarray) -> str:
113
+ gray = cv2.cvtColor(rgb, cv2.COLOR_RGB2GRAY)
114
+ blurred = cv2.GaussianBlur(gray, (15, 15), 0)
115
+ try:
116
+ _, ssim_map = ssim(gray, blurred, full=True, data_range=255)
117
+ except Exception:
118
+ diff = cv2.absdiff(gray, blurred).astype(np.float32) / 255.0
119
+ ssim_map = 1.0 - diff
120
+ ssim_norm = ((ssim_map + 1.0) / 2.0 * 255.0).clip(0, 255).astype(np.uint8)
121
+ colormap = cv2.COLORMAP_RdYlGn if hasattr(cv2, "COLORMAP_RdYlGn") else cv2.COLORMAP_JET
122
+ heatmap_bgr = cv2.applyColorMap(ssim_norm, colormap)
123
+ rgb_bgr = cv2.cvtColor(rgb, cv2.COLOR_RGB2BGR)
124
+ overlay = cv2.addWeighted(rgb_bgr, 0.55, heatmap_bgr, 0.45, 0)
125
+ overlay_rgb = cv2.cvtColor(overlay, cv2.COLOR_BGR2RGB)
126
+ return _array_to_base64(overlay_rgb, fmt="PNG")
127
+
128
+
129
+ # ------------------------- detection helpers --------------------------------
130
+ def _local_texture_authenticity(patch: np.ndarray) -> float:
131
+ if patch is None or patch.size == 0:
132
+ return 0.0
133
+ gray = cv2.cvtColor(patch, cv2.COLOR_RGB2GRAY) if patch.ndim == 3 else patch
134
+ var = cv2.Laplacian(gray, cv2.CV_64F).var()
135
+ norm = (var - 10.0) / (200.0 - 10.0)
136
+ return float(np.clip(norm, 0.0, 1.0))
137
 
138
+
139
+ # ---------------------- Groq runtime backend (placeholder) ------------------
140
+ def _run_detection_groq(rgb: np.ndarray, compiled_model_path: str, conf_thresh: float) -> List[Dict[str, Any]]:
141
+ """
142
+ Placeholder Groq runner. Replace with your project's Groq runtime/SDK calls.
143
+
144
+ Recommended flow:
145
+ - import the Groq runtime installed in your environment (API differs by Groq release)
146
+ - load compiled artifact or use a long-lived runner
147
+ - prepare input (resize / normalize) exactly as the compiled model expects
148
+ - run inference and parse outputs into COCO-like detections:
149
+ [ {"class": "person", "conf": 0.82, "bbox":[x1,y1,x2,y2]}, ... ]
150
+ If Groq runtime isn't installed, this function raises and the pipeline will fallback.
151
  """
152
+ # Try to import a Groq runtime package (NAME VARIES). This is intentionally guarded.
153
  try:
154
+ # Example placeholder import; replace with your runtime import
155
+ import groq_runtime # <<-- REPLACE with actual Groq runtime package for your compiled model
156
  except Exception as exc:
157
+ raise RuntimeError("Groq runtime not installed") from exc
 
158
 
159
+ # PSEUDOCODE (replace with your actual runtime usage):
160
+ try:
161
+ # runner = groq_runtime.Runner(compiled_model_path)
162
+ # model_input = cv2.resize(rgb, (MODEL_W, MODEL_H)).astype(np.float32) / 255.0
163
+ # batch = np.expand_dims(model_input, axis=0)
164
+ # outputs = runner.run(batch)
165
+ # parse outputs -> parsed_detections
166
+ parsed_detections: List[Dict[str, Any]] = []
167
+ # -----> Replace the pseudocode above with real runtime calls and parsing
168
+ return parsed_detections
169
+ except Exception as exc:
170
+ raise RuntimeError("Groq model execution failed") from exc
171
+
172
+
173
+ # -------------------- Groq LLM refinement (optional) ------------------------
174
+ def refine_with_groq_llm(detections: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
175
+ """
176
+ (Optional) Use a Groq LLM to refine/correct YOLO outputs (label mapping, merge boxes, etc.)
177
+ This function is intentionally conservative: if no GROQ_API_KEY or client, it returns original detections.
178
+
179
+ To enable: install the Groq client/SDK for LLM usage and replace the body below
180
+ with a real call. Keep the function robust: always return a list of detections.
181
+ """
182
+ if not GROQ_API_KEY or not detections:
183
+ return detections
184
+
185
+ # >>> EXAMPLE (COMMENTED) - Replace with your Groq LLM client usage <<<
186
+ # try:
187
+ # import groq
188
+ # client = groq.Client(api_key=GROQ_API_KEY)
189
+ # prompt = "You are a maritime analyst. Given these detections (JSON), correct labels and return JSON list."
190
+ # response = client.chat.completions.create(
191
+ # model="llama-3-small", messages=[{"role":"user","content":prompt + json.dumps(detections)}], temperature=0.2
192
+ # )
193
+ # refined = json.loads(response.choices[0].message.content)
194
+ # return refined
195
+ # except Exception as e:
196
+ # logger.warning("Groq LLM refine failed: %s", e)
197
+ # return detections
198
+
199
+ # By default, return unchanged (safe!)
200
+ return detections
201
+
202
+
203
+ # ------------------------- unified detection (Groq -> Ultralytics) ----------
204
+ def run_detection(rgb: np.ndarray,
205
+ sonar_data: Optional[Dict[str, Any]] = None,
206
+ conf_thresh: float = 0.40,
207
+ allowed_only: Optional[List[str]] = None) -> List[Dict[str, Any]]:
208
+ """
209
+ Try configured backend(s) and return enriched detection dicts:
210
+ {
211
+ "class": str,
212
+ "mapped_label": str,
213
+ "confidence": float,
214
+ "forensic_confidence": "HIGH|MEDIUM|LOW",
215
+ "bbox": [x1,y1,x2,y2],
216
+ "hallucinated": bool
217
+ }
218
+ """
219
+ allowed = set(allowed_only) if allowed_only else set(_LABEL_MAP.keys())
220
+ backend_choice = DETECTION_BACKEND
221
  model_path = os.getenv("DETECTION_MODEL", DEFAULT_DETECTION_MODEL)
222
+
223
+ # 1) Try Groq compiled runtime if requested or auto
224
+ if backend_choice in ("groq", "auto"):
225
+ try:
226
+ groq_dets = _run_detection_groq(rgb, model_path, conf_thresh)
227
+ if groq_dets:
228
+ enriched: List[Dict[str, Any]] = []
229
+ h, w = rgb.shape[:2]
230
+ for d in groq_dets:
231
+ cls_name = d.get("class", "unknown")
232
+ conf = float(d.get("conf", 0.0))
233
+ if conf < conf_thresh or cls_name not in allowed:
234
+ continue
235
+ x1, y1, x2, y2 = _ensure_int_box(d.get("bbox", [0, 0, 0, 0]))
236
+ patch = rgb[y1:y2, x1:x2] if y2 > y1 and x2 > x1 else None
237
+ texture = _local_texture_authenticity(patch)
238
+ combined = 0.6 * conf + 0.4 * texture
239
+ forensic = "HIGH" if combined > 0.75 else "MEDIUM" if combined > 0.55 else "LOW"
240
+ hallucinated = (conf > 0.6 and texture < 0.25)
241
+ enriched.append({
242
+ "class": cls_name,
243
+ "mapped_label": _LABEL_MAP.get(cls_name, cls_name),
244
+ "confidence": round(conf, 4),
245
+ "forensic_confidence": forensic,
246
+ "bbox": [x1, y1, x2, y2],
247
+ "hallucinated": hallucinated,
248
+ })
249
+ if enriched:
250
+ # Optional LLM refine step (won't run unless GROQ_API_KEY & client wired)
251
+ return refine_with_groq_llm(enriched)
252
+ except Exception as exc:
253
+ logger.info("Groq backend not used: %s", exc)
254
+
255
+ # 2) Fallback to Ultralytics (YOLO)
256
  try:
257
+ from ultralytics import YOLO # type: ignore
258
  except Exception as exc:
259
+ logger.warning("ultralytics not available (%s); detection disabled.", exc)
260
  return []
261
 
262
  try:
263
+ model = YOLO(model_path)
264
  results = model(rgb, verbose=False)
265
  except Exception as exc:
266
+ logger.warning("Ultralytics model load/inference failed (%s).", exc)
267
  return []
268
 
269
+ detections: List[Dict[str, Any]] = []
270
+ h, w = rgb.shape[:2]
271
  for result in results:
272
  boxes = getattr(result, "boxes", None)
273
  if boxes is None:
274
  continue
275
  for box in boxes:
276
  try:
 
277
  conf = float(box.conf[0]) if hasattr(box.conf, "__len__") else float(box.conf)
278
  if conf < conf_thresh:
279
  continue
 
280
  cls_id = int(box.cls[0]) if hasattr(box.cls, "__len__") else int(box.cls)
281
  cls_name = model.names.get(cls_id, str(cls_id)) if hasattr(model, "names") else str(cls_id)
 
282
  xyxy = box.xyxy[0] if hasattr(box.xyxy, "__len__") and len(box.xyxy) > 0 else None
283
  if xyxy is None:
284
  continue
285
+ x1, y1, x2, y2 = (int(round(float(v))) for v in xyxy)
286
+ if cls_name not in allowed:
287
+ continue
288
+ patch = rgb[y1:y2, x1:x2] if y2 > y1 and x2 > x1 else None
289
+ texture_score = _local_texture_authenticity(patch)
290
+ combined = 0.6 * conf + 0.4 * texture_score
291
+ forensic = "HIGH" if combined > 0.75 else "MEDIUM" if combined > 0.55 else "LOW"
292
+ hallucinated = (conf > 0.6 and texture_score < 0.25)
293
  detections.append({
294
  "class": cls_name,
295
  "mapped_label": _LABEL_MAP.get(cls_name, cls_name),
296
  "confidence": round(conf, 4),
297
+ "forensic_confidence": forensic,
298
+ "bbox": [x1, y1, x2, y2],
299
+ "hallucinated": hallucinated,
300
  })
301
  except Exception as exc:
302
+ logger.debug("Skipping a box due to error: %s", exc)
303
  continue
304
 
305
+ # Optional LLM refinement (no-op unless you wire in GROQ LLM client)
306
+ detections = refine_with_groq_llm(detections)
307
+
308
+ # Sonar-guided hallucination placeholders when no vision detections
309
+ if sonar_data and not detections:
310
+ contours = sonar_data.get("contours", [])
311
+ for c in contours:
312
+ pts = []
313
+ for nx, ny in c:
314
+ px = int(np.clip(nx, 0.0, 1.0) * w)
315
+ py = int(np.clip(ny, 0.0, 1.0) * h)
316
+ pts.append([px, py])
317
+ if len(pts) < 3:
318
+ continue
319
+ pts_np = np.array(pts, dtype=np.int32)
320
+ x, y, ww, hh = cv2.boundingRect(pts_np)
321
+ detections.append({
322
+ "class": "sonar_contact",
323
+ "mapped_label": "Sonar Contact (hallucinated)",
324
+ "confidence": 0.0,
325
+ "forensic_confidence": "LOW",
326
+ "bbox": [int(x), int(y), int(x + ww), int(y + hh)],
327
+ "hallucinated": True,
328
+ "sonar_polygon": pts,
329
+ })
330
 
331
+ return detections
 
 
332
 
 
 
 
 
333
 
334
+ # -------------------- whisper-link / vector sketch --------------------------
335
+ def generate_vector_sketch(detections: List[Dict[str, Any]], max_bytes: int = 1024) -> str:
336
+ sketch = {"detections": []}
337
+ for d in detections:
338
+ x1, y1, x2, y2 = d.get("bbox", [0, 0, 0, 0])
339
+ w = max(1, x2 - x1)
340
+ h = max(1, y2 - y1)
341
+ cx = x1 + w / 2.0
342
+ cy = y1 + h / 2.0
343
+ sketch["detections"].append({
344
+ "label": d.get("mapped_label", d.get("class")),
345
+ "conf": float(d.get("confidence", 0.0)),
346
+ "center": [float(cx), float(cy)],
347
+ "size": [float(w), float(h)],
348
+ "hallucinated": bool(d.get("hallucinated", False)),
349
+ })
350
+ raw = json.dumps(sketch, separators=(",", ":"), ensure_ascii=False).encode("utf-8")
351
+ compressed = zlib.compress(raw, level=9)
352
+ if len(compressed) > max_bytes:
353
+ summary = {"summary": [{"label": x["label"], "conf": x["conf"]} for x in sketch["detections"]]}
354
+ compressed = zlib.compress(json.dumps(summary, separators=(",", ":")).encode("utf-8"), level=9)
355
+ return base64.b64encode(compressed).decode("utf-8")
356
+
357
+
358
+ # --------------------- sonar overlay / wireframe ---------------------------
359
+ def fuse_sonar_overlay(rgb: np.ndarray, sonar_data: Dict[str, Any]) -> str:
360
+ if not sonar_data:
361
+ return _array_to_base64(rgb, fmt="PNG")
362
+ h, w = rgb.shape[:2]
363
+ canvas = rgb.copy()
364
+ contours = sonar_data.get("contours", [])
365
+ for c in contours:
366
+ pts = []
367
+ for nx, ny in c:
368
+ px = int(np.clip(nx, 0.0, 1.0) * (w - 1))
369
+ py = int(np.clip(ny, 0.0, 1.0) * (h - 1))
370
+ pts.append([px, py])
371
+ if len(pts) < 2:
372
+ continue
373
+ pts_np = np.array(pts, dtype=np.int32)
374
+ cv2.polylines(canvas, [pts_np], isClosed=True, color=(255, 255, 0), thickness=2)
375
+ cv2.fillPoly(canvas, [pts_np], color=(40, 40, 40))
376
+ return _array_to_base64(canvas, fmt="PNG")
377
+
378
+
379
+ # --------------------------- SITREP helper ---------------------------------
380
+ def detections_to_sitrep_txt(detections: List[Dict[str, Any]]) -> str:
381
+ if not detections:
382
+ return ("SITUATION: Sensor sweep complete – no contacts.\n"
383
+ "ASSESSMENT: Area clear.\n"
384
+ "RECOMMENDATION: Continue routine patrol.")
385
+ labels = ", ".join({d["mapped_label"] for d in detections})
386
+ count = len(detections)
387
+ return (f"SITUATION: {count} contact(s) detected – {labels}.\n"
388
+ "ASSESSMENT: Requires manual review (forensic confidence noted).\n"
389
+ "RECOMMENDATION: Dispatch response team and maintain sensor lock.")
390
+
391
+
392
+ __all__ = [
393
+ "enhance_image",
394
+ "run_detection",
395
+ "build_heatmap",
396
+ "fuse_sonar_overlay",
397
+ "generate_vector_sketch",
398
+ "detections_to_sitrep_txt",
399
+ ]