Spaces:

dAISYTUIlmenau
/

LTN_Detector

Running

App Files Files Community

chenchangliu commited on Mar 6

Commit

722bb9c

verified ·

1 Parent(s): 27c6faa

Upload 2 files

Browse files

Files changed (2) hide show

app.py +119 -0
pipeline.py +311 -0

app.py ADDED Viewed

	@@ -0,0 +1,119 @@

+#!/usr/bin/env python3
+"""
+Gradio app for HuggingFace Spaces.
+Wraps the LTN localize-and-classify pipeline with a simple web UI.
+"""
+import tempfile
+from pathlib import Path
+import torch
+import gradio as gr
+from PIL import Image
+from torchvision.io import read_image
+from pipeline import (
+    TAXON_NAMES, STATE_NAMES,
+    DET_CONF, YOLO_WEIGHTS, CLF_WEIGHTS,
+    load_classifier, classify_crops, annotate_image,
+)
+from ultralytics import YOLO
+# ---------------------------------------------------------------------------
+# Load models once at startup
+# ---------------------------------------------------------------------------
+DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+yolo = YOLO(str(YOLO_WEIGHTS))
+classifier = load_classifier(CLF_WEIGHTS, DEVICE)
+# ---------------------------------------------------------------------------
+# Inference
+# ---------------------------------------------------------------------------
+def predict(image: Image.Image, conf: float):
+    if image is None:
+        return None, "No image provided."
+    # Save PIL image to a temp file — YOLO and read_image both need a path
+    with tempfile.NamedTemporaryFile(suffix=".jpg", delete=False) as f:
+        tmp_in = Path(f.name)
+    image.save(tmp_in)
+    tmp_out = tmp_in.with_name(tmp_in.stem + "_out.jpg")
+    try:
+        # 1 — Detect
+        det = yolo.predict(str(tmp_in), conf=conf, verbose=False)[0]
+        boxes = det.boxes.xyxy.cpu().tolist()
+        det_confs = det.boxes.conf.cpu().tolist()
+        if not boxes:
+            return image, "No cells detected. Try lowering the confidence threshold."
+        # 2 — Crop
+        img_tensor = read_image(str(tmp_in))
+        if img_tensor.shape[0] == 4:
+            img_tensor = img_tensor[:3]
+        crops = [
+            img_tensor[:, int(y1):int(y2), int(x1):int(x2)]
+            for x1, y1, x2, y2 in boxes
+        ]
+        # 3 — Classify
+        predictions = classify_crops(crops, classifier, DEVICE)
+        # 4 — Annotate
+        annotate_image(tmp_in, boxes, predictions, det_confs, tmp_out)
+        result_img = Image.open(tmp_out).copy()
+        # Build results table text
+        lines = [f"{len(boxes)} cell(s) detected\n"]
+        for i, (taxon_idx, state_idx, tx_conf, st_conf) in enumerate(predictions):
+            lines.append(
+                f"[{i + 1}]  {TAXON_NAMES[taxon_idx]} ({tx_conf:.0%})"
+                f"  —  {STATE_NAMES[state_idx]} ({st_conf:.0%})"
+            )
+        return result_img, "\n".join(lines)
+    finally:
+        tmp_in.unlink(missing_ok=True)
+        tmp_out.unlink(missing_ok=True)
+# ---------------------------------------------------------------------------
+# UI
+# ---------------------------------------------------------------------------
+with gr.Blocks(title="LTN Brood Cell Classifier") as demo:
+    gr.Markdown(
+        "# LTN Brood Cell Classifier\n"
+        "Upload a Layer Trap Nest image. "
+        "YOLOv8 localizes each brood cell; EfficientNet classifies its **taxon** and **state**."
+    )
+    with gr.Row():
+        with gr.Column():
+            inp_image = gr.Image(type="pil", label="Input image")
+            conf_slider = gr.Slider(
+                minimum=0.1, maximum=1.0, value=DET_CONF, step=0.05,
+                label="Detection confidence threshold",
+                info="Raise to keep only high-confidence detections.",
+            )
+            run_btn = gr.Button("Run", variant="primary")
+        with gr.Column():
+            out_image = gr.Image(type="pil", label="Annotated output")
+            out_text  = gr.Textbox(label="Predictions", lines=12)
+    run_btn.click(
+        fn=predict,
+        inputs=[inp_image, conf_slider],
+        outputs=[out_image, out_text],
+    )
+if __name__ == "__main__":
+    demo.launch()

pipeline.py ADDED Viewed

	@@ -0,0 +1,311 @@

+#!/usr/bin/env python3
+"""
+LTN Pipeline: YOLOv8 localization → EfficientNet two-head classification.
+Detects brood cells in Layer Trap Nest images, classifies each crop by
+taxon and state, and saves annotated output images.
+Usage:
+    python pipeline.py image.jpg
+    python pipeline.py images/           # process a whole directory
+    python pipeline.py a.jpg b.jpg --out results/ --conf 0.3
+"""
+from __future__ import annotations
+import argparse
+from pathlib import Path
+# ---------------------------------------------------------------------------
+# CONFIG — edit these instead of passing CLI flags every time
+# ---------------------------------------------------------------------------
+YOLO_WEIGHTS   = Path("yolov8_localizer.pt")
+CLF_WEIGHTS    = Path("effnet_two_head_classifier.pt")
+OUTPUT_DIR     = Path("pipeline_out")
+DET_CONF       = 0.5          # YOLO detection confidence threshold (0–1); raise to be more strict
+BATCH_SIZE     = 32           # classifier batch size
+DEVICE         = "cuda" if __import__("torch").cuda.is_available() else "cpu"
+# ---------------------------------------------------------------------------
+import torch
+import torch.nn as nn
+import torchvision.transforms.functional as TF
+from torchvision.models import efficientnet_b0
+from torchvision.io import read_image
+from torchvision import transforms
+from PIL import Image, ImageDraw, ImageFont
+from ultralytics import YOLO
+# ---------------------------------------------------------------------------
+# Class labels
+# ---------------------------------------------------------------------------
+TAXON_NAMES = [
+    "Anthidium", "Cacoxnus indagator", "Chelostoma campanularum",
+    "Chelostoma florisomne", "Chelostoma rapunculi", "Coeliopencyrtus",
+    "Eumenidae", "Heriades", "Hylaeus", "Ichneumonidae", "Isodontia mexicana",
+    "Megachile", "Osmia bicornis", "Osmia brevicornis", "Osmia cornuta",
+    "Passaloecus", "Pemphredon", "Psenulus", "Trichodes", "Trypoxylon",
+]
+STATE_NAMES = ["DauLv", "DeadLv", "Hatched", "Lv", "OldFood"]
+# One distinct colour per state (RGB)
+STATE_COLORS = [
+    (230, 130,   0),  # DauLv   - amber
+    (210,  30,  45),  # DeadLv  - crimson
+    ( 40, 180,  60),  # Hatched - green
+    ( 30, 140, 240),  # Lv      - blue
+    (150,  50, 220),  # OldFood - purple
+]
+# ---------------------------------------------------------------------------
+# Preprocessing (must match training)
+# ---------------------------------------------------------------------------
+class Letterbox:
+    """Resize so the longer side = `size`, pad shorter side to square."""
+    def __init__(self, size: int = 224, fill: int = 0):
+        self.size = size
+        self.fill = fill
+    def __call__(self, x: torch.Tensor) -> torch.Tensor:
+        _, h, w = x.shape
+        scale = self.size / max(h, w)
+        new_h, new_w = int(round(h * scale)), int(round(w * scale))
+        x = TF.resize(x, [new_h, new_w], antialias=True)
+        pad_h, pad_w = self.size - new_h, self.size - new_w
+        pad_top, pad_left = pad_h // 2, pad_w // 2
+        x = TF.pad(x, [pad_left, pad_top, pad_w - pad_left, pad_h - pad_top], fill=self.fill)
+        return x
+_letterbox = Letterbox(224, fill=0)
+_normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
+def preprocess(crop: torch.Tensor) -> torch.Tensor:
+    """CHW uint8 RGB tensor → normalized 224×224 float tensor."""
+    return _normalize(_letterbox(crop.float() / 255.0))
+# ---------------------------------------------------------------------------
+# Model
+# ---------------------------------------------------------------------------
+class EffNetTwoHead(nn.Module):
+    def __init__(self, num_species: int, num_states: int):
+        super().__init__()
+        base = efficientnet_b0(weights=None)
+        self.features = base.features
+        self.pool = base.avgpool
+        c = base.classifier[1].in_features
+        self.drop = nn.Dropout(0.3)
+        self.head_species = nn.Linear(c, num_species)
+        self.head_state = nn.Linear(c, num_states)
+    def forward(self, x: torch.Tensor):
+        x = self.features(x)
+        x = self.pool(x)
+        x = torch.flatten(x, 1)
+        x = self.drop(x)
+        return self.head_species(x), self.head_state(x)
+def load_classifier(ckpt_path: Path, device: str) -> EffNetTwoHead:
+    ckpt = torch.load(ckpt_path, map_location=device)
+    model = EffNetTwoHead(int(ckpt["num_species"]), int(ckpt["num_states"])).to(device)
+    model.load_state_dict(ckpt["model"], strict=True)
+    model.eval()
+    return model
+# ---------------------------------------------------------------------------
+# Inference
+# ---------------------------------------------------------------------------
+@torch.no_grad()
+def classify_crops(
+    crops: list[torch.Tensor],
+    model: EffNetTwoHead,
+    device: str,
+    batch_size: int = 32,
+) -> list[tuple[int, int, float, float]]:
+    """
+    Args:
+        crops: list of CHW uint8 tensors (RGB)
+    Returns:
+        list of (taxon_idx, state_idx, taxon_conf, state_conf)
+    """
+    results = []
+    for i in range(0, len(crops), batch_size):
+        batch = torch.stack([preprocess(c) for c in crops[i : i + batch_size]]).to(device)
+        lsp, lst = model(batch)
+        sp_conf, sp_idx = lsp.softmax(1).max(1)
+        st_conf, st_idx = lst.softmax(1).max(1)
+        for k in range(len(sp_idx)):
+            results.append((sp_idx[k].item(), st_idx[k].item(), sp_conf[k].item(), st_conf[k].item()))
+    return results
+# ---------------------------------------------------------------------------
+# Visualisation
+# ---------------------------------------------------------------------------
+def _load_font(size: int) -> ImageFont.FreeTypeFont | ImageFont.ImageFont:
+    for name in ["Arial.ttf", "DejaVuSans.ttf", "LiberationSans-Regular.ttf", "Helvetica.ttc"]:
+        try:
+            return ImageFont.truetype(name, size)
+        except Exception:
+            pass
+    return ImageFont.load_default()
+def annotate_image(
+    img_path: Path,
+    boxes: list[list[float]],
+    predictions: list[tuple[int, int, float, float]],
+    det_confs: list[float],
+    out_path: Path,
+) -> None:
+    img = Image.open(img_path).convert("RGB")
+    draw = ImageDraw.Draw(img)
+    # Scale line width and font size with image resolution
+    ref = max(img.width, img.height)
+    lw = max(2, ref // 500)
+    font_size = max(12, ref // 70)
+    font = _load_font(font_size)
+    pad = max(3, font_size // 4)
+    for box, (taxon_idx, state_idx, tx_conf, st_conf), det_conf in zip(boxes, predictions, det_confs):
+        x1, y1, x2, y2 = (int(v) for v in box)
+        color = STATE_COLORS[state_idx % len(STATE_COLORS)]
+        # Bounding box
+        draw.rectangle([x1, y1, x2, y2], outline=color, width=lw)
+        line1 = f"{TAXON_NAMES[taxon_idx]}  {tx_conf:.0%}"
+        line2 = f"{STATE_NAMES[state_idx]}  {st_conf:.0%}"
+        # Measure both lines
+        bb1 = draw.textbbox((0, 0), line1, font=font)
+        bb2 = draw.textbbox((0, 0), line2, font=font)
+        tw = max(bb1[2] - bb1[0], bb2[2] - bb2[0])
+        th = bb1[3] - bb1[1]  # assume same line height
+        label_h = 2 * th + 3 * pad  # height of label block
+        # Place label above box; if not enough room, place it inside the box top
+        if y1 >= label_h:
+            lx1, ly1 = x1, y1 - label_h
+        else:
+            lx1, ly1 = x1, y1 + lw
+        draw.rectangle([lx1, ly1, lx1 + tw + 2 * pad, ly1 + label_h], fill=color)
+        draw.text((lx1 + pad, ly1 + pad), line1, fill=(255, 255, 255), font=font)
+        draw.text((lx1 + pad, ly1 + pad + th + pad), line2, fill=(255, 255, 255), font=font)
+    out_path.parent.mkdir(parents=True, exist_ok=True)
+    img.save(out_path)
+# ---------------------------------------------------------------------------
+# Pipeline
+# ---------------------------------------------------------------------------
+def run_pipeline(
+    img_path: Path,
+    yolo: YOLO,
+    classifier: EffNetTwoHead,
+    device: str,
+    conf: float,
+    out_dir: Path,
+) -> None:
+    print(f"\n{img_path.name}")
+    # 1 — Detect cells
+    det = yolo.predict(str(img_path), conf=conf, verbose=False)[0]
+    boxes = det.boxes.xyxy.cpu().tolist()
+    det_confs = det.boxes.conf.cpu().tolist()
+    if not boxes:
+        print("  No detections.")
+        return
+    print(f"  {len(boxes)} cell(s) detected")
+    # 2 — Crop each detection from the original image
+    img_tensor = read_image(str(img_path))
+    if img_tensor.shape[0] == 4:          # drop alpha channel if present
+        img_tensor = img_tensor[:3]
+    crops = [
+        img_tensor[:, int(y1):int(y2), int(x1):int(x2)]
+        for x1, y1, x2, y2 in boxes
+    ]
+    # 3 — Classify all crops
+    predictions = classify_crops(crops, classifier, device)
+    # 4 — Annotate and save
+    out_path = out_dir / (img_path.stem + "_annotated" + img_path.suffix)
+    annotate_image(img_path, boxes, predictions, det_confs, out_path)
+    for i, (taxon_idx, state_idx, tx_conf, st_conf) in enumerate(predictions):
+        print(f"  [{i + 1}] {TAXON_NAMES[taxon_idx]} ({tx_conf:.0%})  —  {STATE_NAMES[state_idx]} ({st_conf:.0%})")
+    print(f"  → {out_path}")
+# ---------------------------------------------------------------------------
+# Entry point
+# ---------------------------------------------------------------------------
+IMG_EXTS = {".jpg", ".jpeg", ".png", ".webp", ".bmp", ".tiff", ".tif"}
+def main() -> None:
+    ap = argparse.ArgumentParser(description="LTN localize-and-classify pipeline")
+    ap.add_argument("input", type=Path, nargs="+", help="Image file(s) or director(y/ies)")
+    ap.add_argument("--yolo",   type=Path,  default=YOLO_WEIGHTS, help="YOLOv8 weights")
+    ap.add_argument("--clf",    type=Path,  default=CLF_WEIGHTS,  help="Classifier checkpoint")
+    ap.add_argument("--out",    type=Path,  default=OUTPUT_DIR,   help="Output directory")
+    ap.add_argument("--conf",   type=float, default=DET_CONF,     help="YOLO detection confidence threshold")
+    ap.add_argument("--device", type=str,   default=DEVICE)
+    ap.add_argument("--batch",  type=int,   default=BATCH_SIZE,   help="Classifier batch size")
+    args = ap.parse_args()
+    # Collect all image paths
+    img_paths: list[Path] = []
+    for p in args.input:
+        if p.is_dir():
+            img_paths.extend(f for f in sorted(p.iterdir()) if f.suffix.lower() in IMG_EXTS)
+        elif p.suffix.lower() in IMG_EXTS:
+            img_paths.append(p)
+        else:
+            print(f"Warning: skipping {p} (not a recognised image or directory)")
+    if not img_paths:
+        raise SystemExit("No valid image files found.")
+    print(f"Device  : {args.device}")
+    print(f"Images  : {len(img_paths)}")
+    print(f"Loading YOLOv8        from {args.yolo}")
+    yolo = YOLO(str(args.yolo))
+    print(f"Loading classifier    from {args.clf}")
+    classifier = load_classifier(args.clf, args.device)
+    for img_path in img_paths:
+        run_pipeline(img_path, classifier=classifier, yolo=yolo,
+                     device=args.device, conf=args.conf, out_dir=args.out)
+    print("\nDone. Results saved to:", args.out)
+if __name__ == "__main__":
+    main()