Spaces:

HugoHE
/

X-YOLOv10

Runtime error

App Files Files Community

HugoHE commited on Jun 12, 2025

Commit

1d08579

0 Parent(s):

Initial commit with code files

Browse files

Files changed (10) hide show

.gitattributes +36 -0
1.png +3 -0
2.png +3 -0
README.md +40 -0
__pycache__/yolov10_RoIFX.cpython-310.pyc +0 -0
app.py +240 -0
models/finetune.pt +3 -0
models/vanilla.pt +3 -0
requirements.txt +7 -0
yolov10_RoIFX.py +496 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,36 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
+*.png filter=lfs diff=lfs merge=lfs -text

1.png ADDED Viewed

Git LFS Details

SHA256: 24c16cfd753a55f1d427ffbfe77560580717ab224f91e7ef8db393fc50b000f4
Pointer size: 132 Bytes
Size of remote file: 2.02 MB

2.png ADDED Viewed

Git LFS Details

SHA256: 3cca99200d1f53acdc32fa8ecd729bcc8ed6b8e50a50ea80661cc22efdbfc982
Pointer size: 132 Bytes
Size of remote file: 1.43 MB

README.md ADDED Viewed

	@@ -0,0 +1,40 @@

+# YOLOv10 Saliency Heat-map Visualiser
+This Gradio app demonstrates object detection and saliency visualization using YOLOv10 models trained on the VOC dataset. The app allows users to:
+1. Choose between vanilla and finetuned YOLOv10 models
+2. Upload custom images or use provided examples
+3. Visualize object detections with bounding boxes
+4. See saliency heat-maps for each detected object
+## Models
+- **Vanilla VOC**: Base YOLOv10 model trained on VOC dataset
+- **Finetune VOC**: Fine-tuned YOLOv10 model with enhanced performance
+## Features
+- Interactive web interface
+- Real-time object detection
+- Saliency heat-map generation
+- Adjustable confidence threshold
+- Example images included
+## Usage
+1. Select a model from the dropdown menu
+2. Upload an image or use one of the example images
+3. Adjust the confidence threshold if needed
+4. View the detection results and saliency heat-maps
+## Technical Details
+The app uses:
+- Gradio for the web interface
+- YOLOv10 for object detection
+- Custom feature extraction for saliency visualization
+- OpenCV for image processing
+## Examples
+The app includes two example images demonstrating the capabilities of the vanilla model.

__pycache__/yolov10_RoIFX.cpython-310.pyc ADDED Viewed

Binary file (12.8 kB). View file

app.py ADDED Viewed

	@@ -0,0 +1,240 @@

+import functools
+import cv2
+import numpy as np
+import gradio as gr
+import torch
+import os
+from types import MethodType
+from ultralytics import YOLO
+from huggingface_hub import hf_hub_download
+# Import helper functions from the existing feature-extractor script
+from yolov10_RoIFX import (
+    _predict_once,
+    get_result_with_features_yolov10_simple,
+    draw_modern_bbox,
+    draw_feature_heatmap,
+)
+# ---------------------------
+#  Constants & Setup
+# ---------------------------
+# Set up model and example paths
+REPO_ID = "HugoHE/X-YOLOv10"
+MODELS_DIR = "models"
+os.makedirs(MODELS_DIR, exist_ok=True)
+# Download models from Hugging Face Hub
+def download_models():
+    for model_file in ["vanilla.pt", "finetune.pt"]:
+        if not os.path.exists(os.path.join(MODELS_DIR, model_file)):
+            try:
+                hf_hub_download(
+                    repo_id=REPO_ID,
+                    filename=f"models/{model_file}",
+                    local_dir=".",
+                    local_dir_use_symlinks=False
+                )
+            except Exception as e:
+                print(f"Error downloading {model_file}: {e}")
+# Download example images from Hugging Face Hub
+def download_examples():
+    for img_file in ["1.png", "2.png"]:
+        if not os.path.exists(img_file):
+            try:
+                hf_hub_download(
+                    repo_id=REPO_ID,
+                    filename=img_file,
+                    local_dir=".",
+                    local_dir_use_symlinks=False
+                )
+            except Exception as e:
+                print(f"Error downloading {img_file}: {e}")
+# Download required files
+download_models()
+download_examples()
+AVAILABLE_MODELS = {
+    "Vanilla VOC": "vanilla.pt",
+    "Finetune VOC": "finetune.pt"
+}
+# Example images with their descriptions
+EXAMPLES = [
+    ["1.png", "Vanilla VOC", 0.25],
+    ["2.png", "Vanilla VOC", 0.25]
+]
+# ---------------------------
+#  Model loading & caching
+# ---------------------------
+def load_model(model_name: str):
+    """Load a YOLOv10 model and cache it so subsequent calls are fast."""
+    @functools.lru_cache(maxsize=2)
+    def _loader(name: str):
+        model_path = os.path.join(MODELS_DIR, AVAILABLE_MODELS[name])
+        model = YOLO(model_path)
+        # Monkey-patch the predictor so we can extract feature maps on demand
+        model.model._predict_once = MethodType(_predict_once, model.model)
+        # Run a dummy inference to initialise internals
+        model(np.zeros((640, 640, 3)), verbose=False)
+        # Automatically determine which layers to use for feature extraction
+        detect_layer_idx = -1
+        for i, m in enumerate(model.model.model):
+            if "Detect" in type(m).__name__:
+                detect_layer_idx = i
+                break
+        if detect_layer_idx != -1:
+            input_layer_idxs = model.model.model[detect_layer_idx].f
+            embed_layers = sorted(input_layer_idxs) + [detect_layer_idx]
+        else:
+            embed_layers = [16, 19, 22, 23]  # fallback
+        return model, tuple(embed_layers)
+    return _loader(model_name)
+# ---------------------------
+#  Composite heat-map layout
+# ---------------------------
+def generate_heatmap_layout(img_rgb: np.ndarray, model_name: str, conf: float = 0.25):
+    """Return a composite saliency layout image for a given input image & model."""
+    model, embed_layers = load_model(model_name)
+    # Convert RGB (Gradio default) ➜ BGR (OpenCV default)
+    img_bgr = cv2.cvtColor(img_rgb, cv2.COLOR_RGB2BGR)
+    # Run detection + feature extraction
+    results = get_result_with_features_yolov10_simple(
+        model, img_bgr, embed_layers, conf=conf
+    )
+    if not results or len(results) == 0 or not hasattr(results[0], "boxes"):
+        return img_rgb  # nothing detected, return original
+    result = results[0]
+    if len(result.boxes) == 0:
+        return img_rgb
+    num_objects = len(result.boxes)
+    # -------------- Step-1: main image with bboxes --------------
+    main_img = img_bgr.copy()
+    names = [model.model.names[int(cls)] for cls in result.boxes.cls]
+    palette = [
+        (71, 224, 253),
+        (159, 128, 255),
+        (159, 227, 128),
+        (255, 191, 0),
+        (255, 165, 0),
+        (255, 0, 255),
+    ]
+    for i in range(num_objects):
+        lbl = f"{names[i]} {result.boxes.conf[i]:.2f}"
+        draw_modern_bbox(main_img, result.boxes.xyxy[i].cpu().numpy(), lbl, palette[i % len(palette)])
+    # -------------- Step-2: heat-map snippets ------------------
+    snippets = []
+    if hasattr(result, "pooled_feats") and result.pooled_feats:
+        last_pooled = result.pooled_feats[-1]
+        for i in range(num_objects):
+            box = result.boxes.xyxy[i]
+            fmap = last_pooled[i]
+            heatmap_full = draw_feature_heatmap(img_bgr.copy(), box, fmap)
+            x1, y1, x2, y2 = box.cpu().numpy().astype(int)
+            x1, y1 = max(0, x1), max(0, y1)
+            x2, y2 = min(img_bgr.shape[1], x2), min(img_bgr.shape[0], y2)
+            if x2 <= x1 or y2 <= y1:
+                continue
+            snippet = heatmap_full[y1:y2, x1:x2]
+            # Add a small caption under each snippet
+            caption = f"Obj #{i}: {names[i]}"
+            font = cv2.FONT_HERSHEY_SIMPLEX
+            (tw, th), _ = cv2.getTextSize(caption, font, 0.6, 1)
+            canvas = np.full((snippet.shape[0] + th + 15, max(snippet.shape[1], tw + 10), 3), 255, np.uint8)
+            # center the snippet
+            cx = (canvas.shape[1] - snippet.shape[1]) // 2
+            canvas[0 : snippet.shape[0], cx : cx + snippet.shape[1]] = snippet
+            # put caption
+            tx = (canvas.shape[1] - tw) // 2
+            cv2.putText(canvas, caption, (tx, snippet.shape[0] + th + 5), font, 0.6, (0, 0, 0), 1, cv2.LINE_AA)
+            cv2.rectangle(canvas, (0, 0), (canvas.shape[1] - 1, canvas.shape[0] - 1), (180, 180, 180), 1)
+            snippets.append(canvas)
+    if not snippets:
+        # just return the main image if no heatmaps were produced
+        return cv2.cvtColor(main_img, cv2.COLOR_BGR2RGB)
+    # -------------- Step-3: assemble composite canvas ----------
+    main_h, main_w = main_img.shape[:2]
+    pad = 20
+    row_h = max(s.shape[0] for s in snippets)
+    total_row_w = sum(s.shape[1] for s in snippets) + (len(snippets) - 1) * 10
+    row_canvas = np.full((row_h, total_row_w, 3), 255, np.uint8)
+    cur_x = 0
+    for s in snippets:
+        h, w = s.shape[:2]
+        y_off = (row_h - h) // 2
+        row_canvas[y_off : y_off + h, cur_x : cur_x + w] = s
+        cur_x += w + 10
+    canvas_h = main_h + row_h + 3 * pad
+    canvas_w = max(main_w, total_row_w) + 2 * pad
+    final = np.full((canvas_h, canvas_w, 3), 255, np.uint8)
+    # paste main image (top-center)
+    x_main = (canvas_w - main_w) // 2
+    final[pad : pad + main_h, x_main : x_main + main_w] = main_img
+    # paste snippets row (bottom-center)
+    x_row = (canvas_w - total_row_w) // 2
+    final[main_h + 2 * pad : main_h + 2 * pad + row_h, x_row : x_row + total_row_w] = row_canvas
+    # convert back to RGB for display
+    return cv2.cvtColor(final, cv2.COLOR_BGR2RGB)
+# ---------------------------
+#  Gradio UI definition
+# ---------------------------
+def build_demo():
+    image_input = gr.Image(type="numpy", label="Input Image")
+    model_input = gr.Dropdown(
+        choices=list(AVAILABLE_MODELS.keys()),
+        value=list(AVAILABLE_MODELS.keys())[0],
+        label="Select Model"
+    )
+    conf_input = gr.Slider(minimum=0.05, maximum=1.0, step=0.05, value=0.25, label="Confidence Threshold")
+    outputs = gr.Image(type="numpy", label="Saliency Heat-map Layout")
+    demo = gr.Interface(
+        fn=generate_heatmap_layout,
+        inputs=[image_input, model_input, conf_input],
+        outputs=outputs,
+        title="YOLOv10 Saliency Heat-map Visualiser",
+        description="Select a model (vanilla-voc or finetune-voc) and upload an image. The app will overlay bounding boxes and generate saliency heat-maps for each detected object.",
+        examples=EXAMPLES,
+        cache_examples=True,
+    )
+    return demo
+def main():
+    demo = build_demo()
+    demo.launch()
+if __name__ == "__main__":
+    main()

models/finetune.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:71e09e27011f99e9a34df19be89a4ffb0167790871c23e6549c24ddec194cbba
+size 98072713

models/vanilla.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:823126b7af91ebf5ca4a5926a94e10a32c3e95981f264809245d9ba7b197be0c
+size 65543615

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+gradio>=4.0.0
+ultralytics>=8.0.0
+opencv-python-headless>=4.8.0
+numpy>=1.24.0
+torch>=2.0.0
+torchvision>=0.15.0
+huggingface-hub>=0.20.0

yolov10_RoIFX.py ADDED Viewed

	@@ -0,0 +1,496 @@

+# -*- coding: utf-8 -*-
+"""
+YOLOv10 Single Object Feature Extractor
+This script extracts features for a specific detected object by its index.
+It can be used to build feature databases or for targeted object analysis.
+"""
+from ultralytics import YOLO
+from ultralytics.utils.ops import xywh2xyxy, scale_boxes
+from ultralytics.engine.results import Results
+import torch
+import time
+from torch.nn.functional import cosine_similarity
+import cv2
+import matplotlib.pyplot as plt
+import numpy as np
+from pathlib import Path
+import urllib.request
+import argparse
+import json
+from torchvision.ops import RoIAlign as ROIAlign
+import torch.nn as nn
+import torch.nn.functional as F
+from types import MethodType
+import torchvision
+import collections
+# Monkey patch method to get feature maps
+def _predict_once(self, x, profile=False, visualize=False, embed=None):
+    y, dt, embeddings = [], [], []  # outputs
+    for m in self.model:
+        if m.f != -1:  # if not from previous layer
+            x = y[m.f] if isinstance(m.f, int) else [x if j == -1 else y[j] for j in m.f]  # from earlier layers
+        if profile:
+            self._profile_one_layer(m, x, dt)
+        x = m(x)  # run
+        y.append(x if m.i in self.save else None)  # save output
+        if visualize:
+            feature_visualization(x, m.type, m.i, save_dir=visualize)
+        if embed and m.i in embed:
+            embeddings.append(x)
+            if m.i == max(embed):
+                return embeddings
+    return x
+def get_yolov10_object_features_with_pooler(feat_list, idxs, boxes, orig_img_shape):
+    """
+    Extracts object features from YOLOv10 feature maps using RoIAlign.
+    Concatenates features from all levels for each detected object.
+    """
+    # Assuming input image is resized to 640x640
+    img_size = 640
+    # We need to know the downsampling ratio for each feature map
+    # P3 has stride 8, P4 has stride 16, P5 has stride 32
+    spatial_scales = [1.0 / 8, 1.0 / 16, 1.0 / 32]
+    num_rois = len(boxes)
+    if num_rois == 0:
+        return [torch.empty(0)], []
+    # Add batch index 0 to boxes for ROIAlign
+    zeros = torch.full((num_rois, 1), 0, device=boxes.device, dtype=boxes.dtype)
+    rois = torch.cat((zeros, boxes), dim=1)
+    poolers = [
+        ROIAlign(output_size=[7, 7], spatial_scale=ss, sampling_ratio=2) for ss in spatial_scales
+    ]
+    pooled_feats = []
+    for feat_map, pooler in zip(feat_list, poolers):
+        pooled_feats.append(pooler(feat_map, rois))
+    avg_pool = nn.AdaptiveAvgPool2d((1, 1))
+    pooled_feats_flat = [avg_pool(pf).view(num_rois, -1) for pf in pooled_feats]
+    # Concatenate features from all levels
+    final_feats = torch.cat(pooled_feats_flat, dim=1)
+    return [final_feats], pooled_feats
+def get_result_with_features_yolov10_simple(model, imgs, embed_layers, conf=0.25):
+    """
+    Simplified approach: Use standard YOLO inference first, then extract features.
+    """
+    if not isinstance(imgs, list):
+        imgs = [imgs]
+    # First, run standard inference to get proper Results objects
+    results = model(imgs, verbose=False, conf=conf)
+    # Then extract features for each detected object
+    for i, result in enumerate(results):
+        if hasattr(result, 'boxes') and len(result.boxes) > 0:
+            # Get the preprocessed image that was used for inference
+            prepped = model.predictor.preprocess([result.orig_img])
+            # --- Temporarily set the embed layers ---
+            # Save the previous setting so we can restore it afterwards. Leaving a non-None
+            # value in `model.predictor.args.embed` would cause the model to return raw
+            # feature maps (instead of standard detection outputs) on the *next* call,
+            # which results in missing detections for every image processed after the
+            # first one. Restoring the value here ensures normal behaviour for the
+            # following iterations.
+            prev_embed = getattr(model.predictor.args, "embed", None)
+            model.predictor.args.embed = embed_layers
+            # Call inference with embedding to get feature maps
+            features = model.predictor.inference(prepped)
+            # Restore previous embed setting
+            model.predictor.args.embed = prev_embed
+            # The feature maps are all but the last element of the result
+            feature_maps = features[:-1]
+            # Extract features for each detected box
+            boxes_scaled = result.boxes.xyxy
+            # Scale boxes to the preprocessed image size for feature extraction
+            boxes_for_features = scale_boxes(result.orig_img.shape, boxes_scaled.clone(), prepped.shape[2:])
+            # Create dummy indices (we're not using NMS indices here)
+            dummy_idxs = [torch.arange(len(boxes_for_features))]
+            # Get features
+            obj_feats, pooled_feats = get_yolov10_object_features_with_pooler(feature_maps, dummy_idxs, boxes_for_features, result.orig_img.shape)
+            # Add features to the result
+            result.feats = obj_feats[0] if obj_feats else torch.empty(0)
+            result.pooled_feats = pooled_feats
+    return results
+def draw_debug_image(img, boxes, class_names, save_path="debug_detections.png", highlight_idx=None):
+    """Draw bounding boxes on the original image for debugging."""
+    debug_img = img.copy()
+    for i, box in enumerate(boxes):
+        x1, y1, x2, y2 = box.cpu().numpy().astype(int)
+        # Clip coordinates to image bounds
+        x1, y1 = max(0, x1), max(0, y1)
+        x2, y2 = min(img.shape[1], x2), min(img.shape[0], y2)
+        # Highlight the selected object
+        color = (0, 0, 255) if i == highlight_idx else (0, 255, 0)  # Red for selected, green for others
+        thickness = 3 if i == highlight_idx else 2
+        cv2.rectangle(debug_img, (x1, y1), (x2, y2), color, thickness)
+        cv2.putText(debug_img, f"{class_names[i]} #{i}", (x1, y1-10),
+                   cv2.FONT_HERSHEY_SIMPLEX, 0.7, color, 2)
+    cv2.imwrite(save_path, debug_img)
+    print(f"Debug image with bounding boxes saved to {save_path}")
+    return debug_img
+def draw_feature_heatmap(image, box, feature_map):
+    """
+    Draws a feature map as a heatmap on a specific region of an image.
+    """
+    # Detach and move feature map to CPU
+    feature_map = feature_map.detach().cpu()
+    # Average features across channels to get a 2D heatmap
+    heatmap = torch.mean(feature_map, dim=0).numpy()
+    # Normalize heatmap to 0-255
+    if np.max(heatmap) > np.min(heatmap):
+        heatmap = (heatmap - np.min(heatmap)) / (np.max(heatmap) - np.min(heatmap))
+    heatmap = (heatmap * 255).astype(np.uint8)
+    # Get bounding box coordinates
+    x1, y1, x2, y2 = box.cpu().numpy().astype(int)
+    x1, y1 = max(0, x1), max(0, y1)
+    x2, y2 = min(image.shape[1], x2), min(image.shape[0], y2)
+    bbox_w, bbox_h = x2 - x1, y2 - y1
+    if bbox_w <= 0 or bbox_h <= 0:
+        return image # return original image
+    # Resize heatmap to bounding box size
+    heatmap_resized = cv2.resize(heatmap, (bbox_w, bbox_h), interpolation=cv2.INTER_LINEAR)
+    # Apply colormap
+    heatmap_colored = cv2.applyColorMap(heatmap_resized, cv2.COLORMAP_JET)
+    # Get the region of interest from the original image
+    roi = image[y1:y2, x1:x2]
+    # Blend heatmap with ROI
+    overlay = cv2.addWeighted(roi, 0.6, heatmap_colored, 0.4, 0)
+    # Place the overlay back onto the image
+    output_image = image.copy()
+    output_image[y1:y2, x1:x2] = overlay
+    return output_image
+def draw_filled_rounded_rectangle(img, pt1, pt2, color, radius):
+    """Draws a filled rounded rectangle."""
+    x1, y1 = pt1
+    x2, y2 = pt2
+    # Draw circles at the corners
+    cv2.circle(img, (x1 + radius, y1 + radius), radius, color, -1)
+    cv2.circle(img, (x2 - radius, y1 + radius), radius, color, -1)
+    cv2.circle(img, (x1 + radius, y2 - radius), radius, color, -1)
+    cv2.circle(img, (x2 - radius, y2 - radius), radius, color, -1)
+    # Draw the central rectangles
+    cv2.rectangle(img, (x1 + radius, y1), (x2 - radius, y2), color, -1)
+    cv2.rectangle(img, (x1, y1 + radius), (x2, y2 - radius), color, -1)
+def draw_modern_bbox(image, box, label, color):
+    """Draws a modern-style bounding box with a semi-transparent, rounded label."""
+    x1, y1, x2, y2 = box.astype(int)
+    # Draw the main bounding box outline
+    cv2.rectangle(image, (x1, y1), (x2, y2), color, thickness=2)
+    # --- Label ---
+    font = cv2.FONT_HERSHEY_SIMPLEX
+    font_scale = 0.5
+    font_thickness = 1
+    (text_w, text_h), _ = cv2.getTextSize(label, font, font_scale, font_thickness)
+    # Define label background position, handling top-of-image cases
+    label_bg_pt1 = (x1, y1 - text_h - 15)
+    label_bg_pt2 = (x1 + text_w + 10, y1)
+    if label_bg_pt1[1] < 0:
+        label_bg_pt1 = (x1, y1 + 5)
+        label_bg_pt2 = (x1 + text_w + 10, y1 + text_h + 20)
+    # Create an overlay for the semi-transparent background
+    overlay = image.copy()
+    # Draw the filled rounded rectangle on the overlay
+    draw_filled_rounded_rectangle(overlay, label_bg_pt1, label_bg_pt2, color, radius=8)
+    # Blend the overlay with the main image
+    alpha = 0.6
+    cv2.addWeighted(overlay, alpha, image, 1 - alpha, 0, image)
+    # Define text position and draw it on the blended image
+    text_pt = (label_bg_pt1[0] + 5, label_bg_pt1[1] + text_h + 5)
+    cv2.putText(image, label, text_pt, font, font_scale, (0, 0, 0), font_thickness, cv2.LINE_AA)
+def generate_feature_heatmaps(model, img_path, embed_layers, output_dir="./", conf=0.25):
+    """
+    Generates a single composite image containing the main image with bounding boxes
+    and separate heatmap snippets for each detected object.
+    Args:
+        model: YOLOv10 model
+        img_path: Path to the input image
+        embed_layers: List of layer indices to extract features from
+        output_dir: Directory to save outputs
+        conf: Object detection confidence threshold
+    """
+    # Load image
+    img = cv2.imread(img_path)
+    if img is None:
+        raise FileNotFoundError(f"Could not read image at {img_path}")
+    print(f"Processing image: {img_path}")
+    # Get results with features
+    results_with_feat = get_result_with_features_yolov10_simple(model, img_path, embed_layers, conf=conf)
+    if not results_with_feat or not isinstance(results_with_feat, list) or len(results_with_feat) == 0:
+        print("No results returned.")
+        return
+    result = results_with_feat[0]
+    if not hasattr(result, 'boxes') or len(result.boxes) == 0:
+        print("No objects detected in the image.")
+        return
+    num_objects = len(result.boxes)
+    print(f"Total objects detected: {num_objects}. Generating composite layout...")
+    # Get class names
+    all_class_names = [model.model.names[int(cls)] for cls in result.boxes.cls]
+    # --- Step 1: Create the main image with modern bounding boxes ---
+    main_image_with_boxes = img.copy()
+    colors = [(71, 224, 253), (159, 128, 255), (159, 227, 128), (255, 191, 0), (255, 165, 0), (255, 0, 255)]
+    for i in range(num_objects):
+        label = f"{all_class_names[i]} {result.boxes.conf[i]:.2f}"
+        color = colors[i % len(colors)]
+        draw_modern_bbox(main_image_with_boxes, result.boxes.xyxy[i].cpu().numpy(), label, color)
+    # --- Step 2: Generate individual heatmap snippets for each object ---
+    heatmap_snippets = []
+    if hasattr(result, 'pooled_feats') and result.pooled_feats:
+        last_layer_pooled_feats = result.pooled_feats[-1]
+        for i in range(num_objects):
+            box = result.boxes.xyxy[i]
+            feature_map = last_layer_pooled_feats[i]
+            heatmap_on_full = draw_feature_heatmap(img.copy(), box, feature_map)
+            x1, y1, x2, y2 = box.cpu().numpy().astype(int)
+            snippet = heatmap_on_full[y1:y2, x1:x2]
+            label_text = f"Obj #{i}: {all_class_names[i]}"
+            font = cv2.FONT_HERSHEY_SIMPLEX
+            (text_w, text_h), _ = cv2.getTextSize(label_text, font, 0.6, 1)
+            h, w, _ = snippet.shape
+            # Make the snippet canvas wide enough for the text label
+            new_w = max(w, text_w + 10)
+            snippet_with_label = np.full((h + text_h + 15, new_w, 3), 255, dtype=np.uint8)
+            # Paste the snippet (centered) onto the new canvas
+            paste_x = (new_w - w) // 2
+            snippet_with_label[0:h, paste_x:paste_x+w] = snippet
+            # Draw the label text (centered)
+            text_x = (new_w - text_w) // 2
+            cv2.putText(snippet_with_label, label_text, (text_x, h + text_h + 5), font, 0.6, (0,0,0), 1, cv2.LINE_AA)
+            cv2.rectangle(snippet_with_label, (0,0), (new_w-1, h+text_h+14), (180,180,180), 1)
+            heatmap_snippets.append(snippet_with_label)
+    if not heatmap_snippets:
+        print("No heatmaps generated. Saving image with bounding boxes only.")
+        image_name = Path(img_path).stem
+        save_path = Path(output_dir) / f"{image_name}_layout.png"
+        cv2.imwrite(str(save_path), main_image_with_boxes)
+        return
+    # --- Step 3: Arrange snippets and main image into a final composite image ---
+    main_h, main_w, _ = main_image_with_boxes.shape
+    padding = 20
+    # Arrange snippets into a horizontal row
+    snippets_row_h = max(s.shape[0] for s in heatmap_snippets)
+    total_snippets_w = sum(s.shape[1] for s in heatmap_snippets) + (len(heatmap_snippets) - 1) * 10
+    snippets_row = np.full((snippets_row_h, total_snippets_w, 3), 255, dtype=np.uint8)
+    current_x = 0
+    for snippet in heatmap_snippets:
+        h, w, _ = snippet.shape
+        paste_y = (snippets_row_h - h) // 2
+        snippets_row[paste_y:paste_y+h, current_x:current_x+w] = snippet
+        current_x += w + 10
+    # Create the final canvas and place the main image and the snippet row
+    canvas_h = main_h + snippets_row_h + 3 * padding
+    canvas_w = max(main_w, total_snippets_w) + 2 * padding
+    final_image = np.full((canvas_h, canvas_w, 3), 255, dtype=np.uint8)
+    # Paste main image at top-center
+    x_offset_main = (canvas_w - main_w) // 2
+    final_image[padding:padding+main_h, x_offset_main:x_offset_main+main_w] = main_image_with_boxes
+    # Paste snippet row at bottom-center
+    x_offset_snippets = (canvas_w - total_snippets_w) // 2
+    y_offset_snippets = main_h + 2 * padding
+    final_image[y_offset_snippets:y_offset_snippets+snippets_row_h, x_offset_snippets:x_offset_snippets+total_snippets_w] = snippets_row
+    # --- Step 4: Save the final composite image ---
+    image_name = Path(img_path).stem
+    heatmap_path = Path(output_dir) / f"{image_name}_heatmap_layout.png"
+    cv2.imwrite(str(heatmap_path), final_image)
+    print(f"  - Saved composite heatmap layout to: {heatmap_path}")
+def main():
+    parser = argparse.ArgumentParser(description='Generate a composite feature heatmap for all detected objects in an image or a directory of images.')
+    group = parser.add_mutually_exclusive_group(required=True)
+    group.add_argument('--image', '-i', type=str, help='Path to a single input image.')
+    group.add_argument('--input-dir', '-d', type=str, help='Path to a directory of input images.')
+    parser.add_argument('--model', '-m', type=str, default='yolov10n.pt', help='Path to YOLOv10 model')
+    parser.add_argument('--output', '-o', type=str, default='./heatmaps', help='Output directory for generated layouts.')
+    parser.add_argument('--conf', type=float, default=0.25, help='Object detection confidence threshold (e.g., 0.1 for more detections).')
+    args = parser.parse_args()
+    # Create output directory if it doesn't exist
+    Path(args.output).mkdir(parents=True, exist_ok=True)
+    # Load YOLOv10 model
+    print(f"Loading model: {args.model}")
+    model = YOLO(args.model)
+    # Monkey patch the model's prediction method
+    model.model._predict_once = MethodType(_predict_once, model.model)
+    # Initialize the predictor by running a dummy inference
+    model(np.zeros((640, 640, 3)), verbose=False)
+    # Dynamically find the feature map layer indices from the model
+    detect_layer_index = -1
+    for i, m in enumerate(model.model.model):
+        if 'Detect' in type(m).__name__:
+            detect_layer_index = i
+            break
+    if detect_layer_index != -1:
+        input_layers_indices = model.model.model[detect_layer_index].f
+        embed_layers = sorted(input_layers_indices) + [detect_layer_index]
+        print(f"Auto-detected feature layers at indices: {input_layers_indices}")
+        print(f"Embedding features from layers: {embed_layers}")
+    else:
+        print("Could not find Detect layer, falling back to hardcoded indices")
+        embed_layers = [16, 19, 22, 23]
+    # Process either a single image or a directory of images
+    if args.input_dir:
+        input_path = Path(args.input_dir)
+        image_extensions = ['*.jpg', '*.jpeg', '*.png', '*.bmp', '*.tif', '*.tiff']
+        image_files = []
+        for ext in image_extensions:
+            image_files.extend(input_path.glob(ext))
+        if not image_files:
+            print(f"No images found in '{args.input_dir}'.")
+            return
+        print(f"\nFound {len(image_files)} images in '{args.input_dir}'. Processing...")
+        for img_path in image_files:
+            generate_feature_heatmaps(
+                model=model,
+                img_path=str(img_path),
+                embed_layers=embed_layers,
+                output_dir=args.output,
+                conf=args.conf
+            )
+    else: # if args.image
+        generate_feature_heatmaps(
+            model=model,
+            img_path=args.image,
+            embed_layers=embed_layers,
+            output_dir=args.output,
+            conf=args.conf
+        )
+    print(f"\nProcessing complete. All layouts saved to '{args.output}'.")
+if __name__ == "__main__":
+    # If run without arguments, use test image
+    import sys
+    if len(sys.argv) == 1:
+        print("No arguments provided. Running heatmap generation on a test image.")
+        # Load YOLOv10 model
+        print("Loading default model: yolov10n.pt")
+        model = YOLO('yolov10n.pt')
+        model.model._predict_once = MethodType(_predict_once, model.model)
+        model(np.zeros((640, 640, 3)), verbose=False)
+        # Auto-detect layers
+        detect_layer_index = -1
+        for i, m in enumerate(model.model.model):
+            if 'Detect' in type(m).__name__:
+                detect_layer_index = i
+                break
+        if detect_layer_index != -1:
+            input_layers_indices = model.model.model[detect_layer_index].f
+            embed_layers = sorted(input_layers_indices) + [detect_layer_index]
+            print(f"Auto-detected feature layers at indices: {input_layers_indices}")
+        else:
+            embed_layers = [16, 19, 22, 23]
+        # Define test image path
+        img_path = "/home/hew/yolov10FX_obj/id-1.jpg"
+        # Generate heatmaps for the test image
+        print("Using a lower confidence of 0.1 for test mode to find more objects.")
+        generate_feature_heatmaps(
+            model=model,
+            img_path=img_path,
+            embed_layers=embed_layers,
+            output_dir="./",
+            conf=0.1
+        )
+        print(f"\nHeatmap generation completed successfully for test image!")
+    else:
+        main()