File size: 12,137 Bytes

462b9b8
e5b48cb
462b9b8
e5b48cb
 
 
462b9b8
e5b48cb
 
 
 
 
 
 
 
462b9b8
e5b48cb
 
462b9b8
 
 
 
 
2a11bfb
462b9b8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2b37f4a
 
462b9b8
2b37f4a
 
 
462b9b8
2b37f4a
 
 
 
 
 
 
 
 
 
462b9b8
2b37f4a
 
 
462b9b8
2b37f4a
462b9b8
2b37f4a
 
 
462b9b8
2b37f4a
 
 
462b9b8
 
2b37f4a
 
 
462b9b8
2b37f4a
 
 
462b9b8
2b37f4a
462b9b8
2b37f4a
 
 
 
 
 
462b9b8
2b37f4a
 
 
 
 
 
 
 
 
 
 
 
 
462b9b8
2b37f4a
 
 
 
 
 
2a11bfb
 
 
 
 
2b37f4a
 
 
 
 
2a11bfb
 
 
 
 
 
 
 
2b37f4a
 
2a11bfb
 
 
2b37f4a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2a11bfb
2b37f4a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2a11bfb
 
 
 
 
 
 
2b37f4a
 
 
 
 
 
2a11bfb
2b37f4a
 
 
 
 
 
 
2a11bfb
2b37f4a
 
 
 
 
2a11bfb
 
 
2b37f4a
 
 
 
 
 
 
 
 
 
 
2a11bfb
462b9b8
 
2b37f4a
 
 
462b9b8
2b37f4a
2a11bfb
2b37f4a
462b9b8
2b37f4a
 
462b9b8
2b37f4a
 
462b9b8
2b37f4a
 
462b9b8
2a11bfb
2b37f4a
 
 
 
 
2a11bfb
462b9b8
2a11bfb
 
2b37f4a
462b9b8
2b37f4a
 
462b9b8
2b37f4a
 
 
462b9b8
2a11bfb
 
 
 
 
 
2b37f4a
 
2a11bfb
462b9b8
2b37f4a
 
 
 
 
462b9b8
2a11bfb
 
2b37f4a
 
 
 
462b9b8
2b37f4a

"""
Inference script for TAS-BB-v1 (Tasmania MEWC Species Classifier)

MEWC (Mega Efficient Wildlife Classifier) for Tasmania trained on 2.5 million labelled
images from 96 classes. Includes all non-volant terrestrial mammals (native and introduced)
and 50+ commonly observed bird species. Overall accuracy and F1 scores exceed 99%.

Model: Tasmania MEWC Ensemble
Input: 224x224 RGB images
Framework: Keras 3 with JAX backend (EfficientNet v2 Small architecture)
Classes: 96 Tasmanian terrestrial mammals and birds
Developer: Barry Brook (University of Tasmania)
Citation: https://ecoevorxiv.org/repository/view/6405/
License: CC BY 4.0
Info: https://github.com/zaandahl/mewc

Author: Peter van Lunteren
Created: 2026-01-14
"""

from __future__ import annotations

import os
import sys
from pathlib import Path

import cv2
import numpy as np
import tensorflow as tf
import yaml
from keras import saving
from PIL import Image, ImageFile

# Set Keras backend to JAX (as per original MEWC code)
os.environ["KERAS_BACKEND"] = "jax"

# Don't freak out over truncated images
ImageFile.LOAD_TRUNCATED_IMAGES = True


class ModelInference:
    """MEWC-Keras inference implementation for Tasmania species classifier."""

    def __init__(self, model_dir: Path, model_path: Path):
        """
        Initialize with model paths.

        Args:
            model_dir: Directory containing model files (including class_list.yaml)
            model_path: Path to tas_ens_mewc.keras file
        """
        self.model_dir = model_dir
        self.model_path = model_path
        self.model = None
        self.img_size = 384  # MEWC uses 384x384 images
        self.class_map: dict[str, str] | None = None
        self.class_ids: list[str] | None = None

    def check_gpu(self) -> bool:
        """
        Check GPU availability for TensorFlow/Keras inference.

        TensorFlow can detect GPUs, Metal (Apple Silicon), and CUDA.

        Returns:
            True if GPU available, False otherwise
        """
        try:
            gpus = tf.config.list_logical_devices('GPU')
            return len(gpus) > 0
        except Exception:
            return False

    def load_model(self) -> None:
        """
        Load Keras classification model into memory.

        This function is called once during worker initialization.
        The model is stored in self.model and reused for all subsequent
        classification requests.

        Also loads the class_list.yaml file which maps class indices to species names.

        Raises:
            RuntimeError: If model loading fails
            FileNotFoundError: If model_path or class_list.yaml is invalid
        """
        if not self.model_path.exists():
            raise FileNotFoundError(f"Model file not found: {self.model_path}")

        # Load the Keras model (without compilation for inference only)
        try:
            self.model = saving.load_model(str(self.model_path), compile=False)
        except Exception as e:
            raise RuntimeError(f"Failed to load Keras model from {self.model_path}: {e}") from e

        # Load class_list.yaml
        class_list_path = self.model_dir / "class_list.yaml"
        if not class_list_path.exists():
            raise FileNotFoundError(
                f"class_list.yaml not found: {class_list_path}\n"
                f"MEWC models require class_list.yaml in the model directory."
            )

        try:
            with open(class_list_path, 'r') as f:
                self.class_map = yaml.safe_load(f)
        except Exception as e:
            raise RuntimeError(f"Failed to load class_list.yaml: {e}") from e

        # The YAML can be formatted as either {int_str: species_name} or {species_name: int}
        # IMPORTANT: The model was trained using LEXICOGRAPHIC sorting of YAML keys!
        # This means '10' comes before '2' in the sorted order, which creates a specific
        # class ordering that the model learned during training. We MUST use the same
        # lexicographic sort to match the model's expectations.

        # Check if keys are numeric (int:label format) or string (label:int format)
        formatted_int_label = self._can_all_keys_be_converted_to_int(self.class_map)

        if formatted_int_label:
            # Format: {'0': 'species1', '1': 'species2', ...}
            # Sort keys LEXICOGRAPHICALLY (as strings) to match model training
            # This creates: ['0', '1', '10', '100', '108', '11', '117', '118', '12', '13', '14', ...]
            inv_class = {v: k for k, v in self.class_map.items()}
            yaml_keys_sorted = sorted(inv_class.values())  # Lexicographic sort on string keys

            # Build dense list: model_output[i] → class_ids[i] = species at yaml_keys_sorted[i]
            self.class_ids = [self.class_map[yaml_key] for yaml_key in yaml_keys_sorted]
        else:
            # Format: {"species1": 0, "species2": 1, ...}
            # Invert to create list where list[i] = species
            inv_class = {v: k for k, v in self.class_map.items()}
            self.class_ids = [inv_class[i] for i in sorted(inv_class.keys())]

    def _can_all_keys_be_converted_to_int(self, d: dict) -> bool:
        """
        Check if all dictionary keys can be converted to integers.

        Used to determine class_list.yaml format.

        Args:
            d: Dictionary to check

        Returns:
            True if all keys are convertible to int, False otherwise
        """
        for key in d.keys():
            try:
                int(key)
            except ValueError:
                return False
        return True

    def get_crop(
        self, image: Image.Image, bbox: tuple[float, float, float, float]
    ) -> Image.Image | None:
        """
        Crop image using MEWC-specific preprocessing.

        This cropping method is used by MEWC and follows the MegaDetector
        visualization_utils approach. It:
        1. Denormalizes the bbox coordinates
        2. Clips to image boundaries
        3. Returns the cropped region (no padding or squaring)

        Reference: https://github.com/zaandahl/mewc-snip/blob/main/src/mewc_snip.py#L29
        Reference: https://github.com/agentmorris/MegaDetector/blob/main/megadetector/visualization/visualization_utils.py#L352

        Args:
            image: PIL Image (full resolution)
            bbox: Normalized bounding box (x, y, width, height) in range [0.0, 1.0]

        Returns:
            Cropped PIL Image, or None if bbox is invalid

        Raises:
            None - Returns None for invalid boxes (graceful degradation)
        """
        x1, y1, w_box, h_box = bbox

        # Check for invalid bounding boxes (zero or negative dimensions)
        if w_box <= 0 or h_box <= 0:
            print(f"[TAS get_crop] Rejecting bbox with zero/negative dims: w={w_box}, h={h_box}", file=sys.stderr, flush=True)
            return None

        # Convert normalized coordinates to pixel coordinates
        ymin, xmin, ymax, xmax = y1, x1, y1 + h_box, x1 + w_box
        im_width, im_height = image.size

        # Denormalize
        left = xmin * im_width
        right = xmax * im_width
        top = ymin * im_height
        bottom = ymax * im_height

        # Clip to image boundaries (ensure non-negative)
        left = max(left, 0)
        right = max(right, 0)
        top = max(top, 0)
        bottom = max(bottom, 0)

        # Clip to image boundaries (ensure within image)
        left = min(left, im_width - 1)
        right = min(right, im_width - 1)
        top = min(top, im_height - 1)
        bottom = min(bottom, im_height - 1)

        # Final check - ensure crop has valid dimensions
        crop_width = right - left
        crop_height = bottom - top

        if crop_width <= 0 or crop_height <= 0:
            print(
                f"[TAS get_crop] Rejecting bbox after clipping - crop size {crop_width:.1f}x{crop_height:.1f}\n"
                f"  Original bbox: x={x1:.4f}, y={y1:.4f}, w={w_box:.4f}, h={h_box:.4f}\n"
                f"  Image size: {im_width}x{im_height}\n"
                f"  Pixel coords after clip: ({left:.1f},{top:.1f}) to ({right:.1f},{bottom:.1f})",
                file=sys.stderr, flush=True
            )
            return None

        # Crop image
        image_cropped = image.crop((left, top, right, bottom))
        return image_cropped

    def get_classification(self, crop: Image.Image) -> list[list[str, float]]:
        """
        Run MEWC-Keras classification on cropped image.

        Workflow:
        1. Convert PIL Image to numpy array
        2. Resize to 384x384 (MEWC input size)
        3. Run model prediction
        4. Return all class probabilities (unsorted - worker handles sorting)

        Args:
            crop: Cropped PIL Image

        Returns:
            List of [class_name, confidence] lists for ALL classes, in model order.
            Example: [["unknown_animal", 0.00234], ["tasmanian_pademelon", 0.50674], ...]
            NOTE: Sorting by confidence is handled by classification_worker.py

        Raises:
            RuntimeError: If model not loaded or inference fails
        """
        if self.model is None:
            raise RuntimeError("Model not loaded - call load_model() first")

        if self.class_ids is None:
            raise RuntimeError("Class IDs not loaded - call load_model() first")

        if crop is None:
            print("[TAS get_classification] Received None crop, returning empty", file=sys.stderr, flush=True)
            return []

        try:
            # Convert PIL Image to numpy array
            img = np.array(crop)

            if img.size == 0:
                print("[TAS get_classification] Zero-size numpy array, returning empty", file=sys.stderr, flush=True)
                return []

            # Resize to MEWC input size (384x384)
            img = cv2.resize(img, (self.img_size, self.img_size))

            # Add batch dimension
            img = np.expand_dims(img, axis=0)

            # Run prediction (verbose=0 suppresses progress bar)
            pred = self.model.predict(img, verbose=0)[0]

            # Build list of [class_name, confidence] pairs (as lists, not tuples!)
            # class_ids is already in the correct order from class_list.yaml
            classifications = []
            for i in range(len(pred)):
                class_name = self.class_ids[i]  # Get species name from class_list.yaml
                confidence = float(pred[i])
                classifications.append([class_name, confidence])

            # NOTE: Sorting by confidence is handled by classification_worker.py
            # Model developers don't need to sort - just return all class predictions
            return classifications

        except Exception as e:
            raise RuntimeError(f"MEWC-Keras classification failed: {e}") from e

    def get_class_names(self) -> dict[str, str]:
        """
        Get mapping of class IDs to species names from class_list.yaml.

        Returns a 1-indexed contiguous mapping that matches the model's output order.
        The model was trained with lexicographic sorting of YAML keys, so we create
        a simple 1-indexed mapping: {1: species_at_position_0, 2: species_at_position_1, ...}

        This matches the MegaDetector JSON format and the original MEWC implementation.

        Returns:
            Dict mapping class ID (1-indexed string) to species name
            Example: {"1": "unknown_animal", "2": "tasmanian_pademelon", ..., "10": "fallow_deer", ...}

        Raises:
            RuntimeError: If class_ids not loaded
        """
        if self.class_ids is None:
            raise RuntimeError("Class IDs not loaded - call load_model() first")

        # Build 1-indexed mapping: model position i → JSON ID str(i+1)
        # class_ids[0] → "1", class_ids[1] → "2", ..., class_ids[9] → "10" (fallow_deer)
        class_names = {}
        for i, class_name in enumerate(self.class_ids):
            class_id_str = str(i + 1)  # 1-indexed
            class_names[class_id_str] = class_name

        return class_names