Spaces:

Arko007
/

maritime

Sleeping

App Files Files Community

Arko007 commited on Mar 1

Commit

4f41596

verified ·

1 Parent(s): 3d2e524

Update processing.py

Browse files

Files changed (1) hide show

processing.py +76 -42

processing.py CHANGED Viewed

@@ -2,18 +2,20 @@
 Image processing pipeline for SUB-SENTINEL.
 Provides three functions:
-  enhance_image(raw_bytes)        → (base64_str, numpy_array)
-  run_detection(image_array)      → list[dict]
-  build_heatmap(image_array)      → base64_str
 All heavy-weight model paths gracefully fall back to CPU-friendly alternatives
-when model weights are absent.
 """
-import base64
 import io
 import logging
-from typing import Optional
 import cv2
 import numpy as np
@@ -21,6 +23,13 @@ from PIL import Image
 from skimage.metrics import structural_similarity as ssim
 logger = logging.getLogger(__name__)
 # ---------------------------------------------------------------------------
 # Maritime label mapping for YOLOv8 COCO classes
@@ -35,13 +44,15 @@ _LABEL_MAP: dict[str, str] = {
 }
 def _array_to_base64(img_array: np.ndarray, fmt: str = "JPEG") -> str:
     """Convert a uint8 numpy array (H×W×C, RGB) to a base-64 data-URI string."""
     pil_img = Image.fromarray(img_array.astype(np.uint8))
     buf = io.BytesIO()
-    pil_img.save(buf, format=fmt, quality=90)
     encoded = base64.b64encode(buf.getvalue()).decode("utf-8")
-    mime = "image/jpeg" if fmt == "JPEG" else "image/png"
     return f"data:{mime};base64,{encoded}"
@@ -57,8 +68,6 @@ def _bytes_to_array(raw_bytes: bytes) -> np.ndarray:
 # ---------------------------------------------------------------------------
 # 1. Underwater image enhancement
 # ---------------------------------------------------------------------------
 def _clahe_enhance(rgb: np.ndarray) -> np.ndarray:
     """
     CPU-friendly underwater enhancement using CLAHE on LAB colour space.
@@ -82,7 +91,6 @@ def _funiegan_enhance(rgb: np.ndarray) -> Optional[np.ndarray]:
     """
     weights_path = "weights/funiegan.onnx"
     try:
-        import os
         if not os.path.exists(weights_path):
             return None
         net = cv2.dnn.readNetFromONNX(weights_path)
@@ -92,10 +100,11 @@ def _funiegan_enhance(rgb: np.ndarray) -> Optional[np.ndarray]:
         blob = cv2.dnn.blobFromImage(resized)
         net.setInput(blob)
         out = net.forward()
         out_img = ((out[0].transpose(1, 2, 0) + 1.0) * 127.5).clip(0, 255).astype(np.uint8)
         return cv2.resize(out_img, (w, h))
     except Exception as exc:
-        logger.warning("FUnIE-GAN inference failed (%s); using CLAHE fallback.", exc)
         return None
@@ -115,68 +124,93 @@ def enhance_image(raw_bytes: bytes) -> tuple[str, np.ndarray]:
 # ---------------------------------------------------------------------------
-# 2. Object detection (YOLOv8n)
 # ---------------------------------------------------------------------------
-def run_detection(rgb: np.ndarray) -> list[dict]:
     """
-    Run YOLOv8n COCO detection and map labels to maritime terminology.
     Returns a list of detection dicts:
       {class, mapped_label, confidence, bbox: [x1, y1, x2, y2]}
     """
     try:
-        from ultralytics import YOLO  # lazy import – large package
-        model = YOLO("yolov8n.pt")    # downloads automatically on first run
         results = model(rgb, verbose=False)
     except Exception as exc:
-        logger.warning("YOLOv8n detection failed (%s); returning empty detections.", exc)
         return []
-    detections = []
     for result in results:
-        if result.boxes is None:
             continue
-        for box in result.boxes:
-            cls_id = int(box.cls[0])
-            cls_name = model.names.get(cls_id, str(cls_id))
-            conf = float(box.conf[0])
-            x1, y1, x2, y2 = (float(v) for v in box.xyxy[0])
-            detections.append(
-                {
                     "class": cls_name,
                     "mapped_label": _LABEL_MAP.get(cls_name, cls_name),
                     "confidence": round(conf, 4),
                     "bbox": [round(x1), round(y1), round(x2), round(y2)],
-                }
-            )
     return detections
 # ---------------------------------------------------------------------------
 # 3. SSIM-based forensic heatmap
 # ---------------------------------------------------------------------------
 def build_heatmap(rgb: np.ndarray) -> str:
     """
     Generate a forensic heatmap by comparing the original image against a
-    Gaussian-blurred reference.  High SSIM → green; low SSIM → red.
-    Returns a base64-encoded PNG heatmap.
     """
     gray = cv2.cvtColor(rgb, cv2.COLOR_RGB2GRAY)
-    # Reference: gently blurred version of the same frame
     blurred = cv2.GaussianBlur(gray, (15, 15), 0)
-    # Compute SSIM score map (window-level scores)
-    _, ssim_map = ssim(gray, blurred, full=True, data_range=255)
     # Normalise to [0, 255]
-    ssim_norm = ((ssim_map + 1.0) / 2.0 * 255).clip(0, 255).astype(np.uint8)
-    # Map to BGR: low similarity → red (forensic interest), high → green
     colormap = cv2.COLORMAP_RdYlGn if hasattr(cv2, "COLORMAP_RdYlGn") else cv2.COLORMAP_JET
     heatmap_bgr = cv2.applyColorMap(ssim_norm, colormap)
@@ -185,4 +219,4 @@ def build_heatmap(rgb: np.ndarray) -> str:
     overlay = cv2.addWeighted(rgb_bgr, 0.55, heatmap_bgr, 0.45, 0)
     overlay_rgb = cv2.cvtColor(overlay, cv2.COLOR_BGR2RGB)
-    return _array_to_base64(overlay_rgb, fmt="PNG")

 Image processing pipeline for SUB-SENTINEL.
 Provides three functions:
+  enhance_image(raw_bytes)        -> (base64_str, numpy_array)
+  run_detection(image_array)      -> list[dict]
+  build_heatmap(image_array)      -> base64_str
 All heavy-weight model paths gracefully fall back to CPU-friendly alternatives
+when model weights are absent. Use the environment variable DETECTION_MODEL
+to override the default detection model (e.g. "yolov8m.pt" or a local path).
 """
+import os
 import io
+import base64
 import logging
+from typing import Optional, List, Dict
 import cv2
 import numpy as np
 from skimage.metrics import structural_similarity as ssim
 logger = logging.getLogger(__name__)
+logger.addHandler(logging.NullHandler())
+# ---------------------------------------------------------------------------
+# Default detection model (change via env var DETECTION_MODEL if needed)
+# ---------------------------------------------------------------------------
+# NOTE: default changed to yolov8m for improved accuracy.
+DEFAULT_DETECTION_MODEL = os.getenv("DETECTION_MODEL", "yolov8m.pt")
 # ---------------------------------------------------------------------------
 # Maritime label mapping for YOLOv8 COCO classes
 }
+# --------------------------- utilities -------------------------------------
 def _array_to_base64(img_array: np.ndarray, fmt: str = "JPEG") -> str:
     """Convert a uint8 numpy array (H×W×C, RGB) to a base-64 data-URI string."""
     pil_img = Image.fromarray(img_array.astype(np.uint8))
     buf = io.BytesIO()
+    fmt_upper = fmt.upper()
+    pil_img.save(buf, format=fmt_upper, quality=90)
     encoded = base64.b64encode(buf.getvalue()).decode("utf-8")
+    mime = "image/jpeg" if fmt_upper == "JPEG" else "image/png"
     return f"data:{mime};base64,{encoded}"
 # ---------------------------------------------------------------------------
 # 1. Underwater image enhancement
 # ---------------------------------------------------------------------------
 def _clahe_enhance(rgb: np.ndarray) -> np.ndarray:
     """
     CPU-friendly underwater enhancement using CLAHE on LAB colour space.
     """
     weights_path = "weights/funiegan.onnx"
     try:
         if not os.path.exists(weights_path):
             return None
         net = cv2.dnn.readNetFromONNX(weights_path)
         blob = cv2.dnn.blobFromImage(resized)
         net.setInput(blob)
         out = net.forward()
+        # out shape may be (1, C, H, W)
         out_img = ((out[0].transpose(1, 2, 0) + 1.0) * 127.5).clip(0, 255).astype(np.uint8)
         return cv2.resize(out_img, (w, h))
     except Exception as exc:
+        logger.warning("FUnIE-GAN inference failed (%s); falling back to CLAHE.", exc)
         return None
 # ---------------------------------------------------------------------------
+# 2. Object detection (YOLOv8 family; default is yolov8m.pt)
 # ---------------------------------------------------------------------------
+def run_detection(rgb: np.ndarray, conf_thresh: float = 0.30) -> List[dict]:
     """
+    Run YOLO detection (model chosen by DETECTION_MODEL env var or default)
+    and map labels to maritime terminology.
     Returns a list of detection dicts:
       {class, mapped_label, confidence, bbox: [x1, y1, x2, y2]}
     """
     try:
+        # Lazy import to avoid heavy dependency cost at module import time
+        from ultralytics import YOLO  # type: ignore
+    except Exception as exc:
+        logger.warning("ultralytics package not available (%s); detection disabled.", exc)
+        return []
+    model_path = os.getenv("DETECTION_MODEL", DEFAULT_DETECTION_MODEL)
+    try:
+        model = YOLO(model_path)
+    except Exception as exc:
+        logger.warning("Failed to load detection model '%s' (%s). Returning empty.", model_path, exc)
+        return []
+    try:
+        # Model accepts numpy image (RGB) directly
         results = model(rgb, verbose=False)
     except Exception as exc:
+        logger.warning("Model inference failed (%s). Returning empty.", exc)
         return []
+    detections: List[dict] = []
     for result in results:
+        boxes = getattr(result, "boxes", None)
+        if boxes is None:
             continue
+        for box in boxes:
+            try:
+                # Defensive extraction: the ultralytics API returns tensors/arrays
+                conf = float(box.conf[0]) if hasattr(box.conf, "__len__") else float(box.conf)
+                if conf < conf_thresh:
+                    continue
+                cls_id = int(box.cls[0]) if hasattr(box.cls, "__len__") else int(box.cls)
+                cls_name = model.names.get(cls_id, str(cls_id)) if hasattr(model, "names") else str(cls_id)
+                xyxy = box.xyxy[0] if hasattr(box.xyxy, "__len__") and len(box.xyxy) > 0 else None
+                if xyxy is None:
+                    continue
+                x1, y1, x2, y2 = (float(v) for v in xyxy)
+                detections.append({
                     "class": cls_name,
                     "mapped_label": _LABEL_MAP.get(cls_name, cls_name),
                     "confidence": round(conf, 4),
                     "bbox": [round(x1), round(y1), round(x2), round(y2)],
+                })
+            except Exception as exc:
+                logger.debug("Skipping box due to extraction error: %s", exc)
+                continue
     return detections
 # ---------------------------------------------------------------------------
 # 3. SSIM-based forensic heatmap
 # ---------------------------------------------------------------------------
 def build_heatmap(rgb: np.ndarray) -> str:
     """
     Generate a forensic heatmap by comparing the original image against a
+    Gaussian-blurred reference. High SSIM -> green; low SSIM -> red.
+    Returns a base64-encoded PNG heatmap (data URI).
     """
     gray = cv2.cvtColor(rgb, cv2.COLOR_RGB2GRAY)
     blurred = cv2.GaussianBlur(gray, (15, 15), 0)
+    # Compute SSIM score map; fallback to simple difference if it fails
+    try:
+        _, ssim_map = ssim(gray, blurred, full=True, data_range=255)
+    except Exception as exc:
+        logger.warning("SSIM computation failed (%s); falling back to absdiff.", exc)
+        diff = cv2.absdiff(gray, blurred).astype(np.float32)
+        ssim_map = 1.0 - (diff / 255.0)
     # Normalise to [0, 255]
+    ssim_norm = ((ssim_map + 1.0) / 2.0 * 255.0).clip(0, 255).astype(np.uint8)
+    # Map to BGR: low similarity -> red, high -> green
     colormap = cv2.COLORMAP_RdYlGn if hasattr(cv2, "COLORMAP_RdYlGn") else cv2.COLORMAP_JET
     heatmap_bgr = cv2.applyColorMap(ssim_norm, colormap)
     overlay = cv2.addWeighted(rgb_bgr, 0.55, heatmap_bgr, 0.45, 0)
     overlay_rgb = cv2.cvtColor(overlay, cv2.COLOR_BGR2RGB)
+    return _array_to_base64(overlay_rgb, fmt="PNG")