File size: 8,594 Bytes
#!/usr/bin/env python3
"""
Material Surface Classifier — Inference Script
================================================
Classify images of surfaces into 5 material categories:
  asphalt · concrete · metal · wood · other

Supports:
  - Single image, multiple images, or entire directories
  - CLI and programmatic (Python import) usage
  - Local model path or Hugging Face Hub model ID
  - JSON or human-readable output
  - Confidence thresholding
  - Batch processing

Requirements:
  pip install transformers torch pillow timm

Usage (CLI):
  # Single image
  python inference.py photo.jpg

  # Multiple images
  python inference.py img1.jpg img2.png img3.jpg

  # Directory of images
  python inference.py path/to/image_dir/

  # With custom model path (local or Hub)
  python inference.py photo.jpg --model models/material_surface
  python inference.py photo.jpg --model dacanizalesconvers/material-surface-classifier

  # JSON output
  python inference.py photo.jpg --json

  # With confidence threshold (flag low-confidence predictions)
  python inference.py photo.jpg --threshold 0.5

Usage (Python):
  from inference import MaterialSurfaceClassifier

  clf = MaterialSurfaceClassifier("dacanizalesconvers/material-surface-classifier")
  result = clf.predict("photo.jpg")
  # => {"label": "concrete", "confidence": 0.94, "scores": {"concrete": 0.94, ...}}

  results = clf.predict_batch(["a.jpg", "b.jpg"])
"""

import argparse
import json
import sys
from pathlib import Path
from typing import Union, Optional

import torch
from PIL import Image
from transformers import AutoImageProcessor, AutoModelForImageClassification, pipeline


# ─── Constants ────────────────────────────────────────────────────────────────
DEFAULT_MODEL = "dacanizalesconvers/material-surface-classifier"
LABELS = ["asphalt", "concrete", "metal", "other", "wood"]
IMAGE_EXTENSIONS = {".jpg", ".jpeg", ".png", ".bmp", ".gif", ".tiff", ".webp"}


# ─── Programmatic API ────────────────────────────────────────────────────────
class MaterialSurfaceClassifier:
    """
    Material surface classifier for programmatic use.

    Args:
        model_path: Local path or HF Hub model ID.
        device:     "cuda", "cpu", or "mps". Auto-detected if None.

    Example:
        clf = MaterialSurfaceClassifier("dacanizalesconvers/material-surface-classifier")
        result = clf.predict("photo.jpg")
        print(result["label"], result["confidence"])
    """

    def __init__(self, model_path: str = DEFAULT_MODEL, device: Optional[str] = None):
        if device is None:
            device = "cuda" if torch.cuda.is_available() else "cpu"
        self.device = device
        self.pipe = pipeline(
            "image-classification",
            model=model_path,
            device=device,
        )

    def predict(self, image: Union[str, Path, Image.Image], top_k: int = 5) -> dict:
        """
        Classify a single image.

        Args:
            image:  File path (str/Path) or PIL Image.
            top_k:  Number of top predictions to return.

        Returns:
            dict with keys: label, confidence, scores
        """
        if isinstance(image, (str, Path)):
            image = Image.open(image).convert("RGB")
        elif not isinstance(image, Image.Image):
            raise TypeError(f"Expected str, Path, or PIL.Image, got {type(image)}")

        raw = self.pipe(image, top_k=top_k)
        return {
            "label": raw[0]["label"],
            "confidence": round(raw[0]["score"], 4),
            "scores": {r["label"]: round(r["score"], 4) for r in raw},
        }

    def predict_batch(
        self,
        images: list,
        top_k: int = 5,
        threshold: float = 0.0,
    ) -> list:
        """
        Classify a list of images.

        Args:
            images:    List of file paths (str/Path) or PIL Images.
            top_k:     Number of top predictions per image.
            threshold: Minimum confidence to accept a prediction.

        Returns:
            List of dicts, each with: file (if path), label, confidence, scores,
            and optionally below_threshold or error.
        """
        results = []
        for img in images:
            entry = {}
            if isinstance(img, (str, Path)):
                entry["file"] = str(img)
            try:
                result = self.predict(img, top_k=top_k)
                entry.update(result)
                if result["confidence"] < threshold:
                    entry["below_threshold"] = True
            except Exception as e:
                entry["error"] = str(e)
            results.append(entry)
        return results


# ─── CLI helpers ──────────────────────────────────────────────────────────────
def collect_images(paths: list) -> list:
    """Gather image file paths from files and directories."""
    image_paths = []
    for p in paths:
        p = Path(p)
        if p.is_dir():
            for ext in IMAGE_EXTENSIONS:
                image_paths.extend(sorted(p.glob(f"*{ext}")))
                image_paths.extend(sorted(p.glob(f"*{ext.upper()}")))
        elif p.is_file() and p.suffix.lower() in IMAGE_EXTENSIONS:
            image_paths.append(p)
        else:
            print(f"⚠️  Skipping: {p} (not a recognised image file or directory)",
                  file=sys.stderr)
    return image_paths


def print_results(results: list, as_json: bool = False):
    """Pretty-print classification results."""
    if as_json:
        print(json.dumps(results, indent=2))
        return

    for r in results:
        if "error" in r:
            print(f"❌ {r.get('file', '?')}: {r['error']}")
            continue

        icon = "⚠️ " if r.get("below_threshold") else "✅"
        name = r.get("file", "<image>")
        print(f"{icon} {name}")
        print(f"   Prediction: {r['label']} ({r['confidence']:.1%})")
        scores_str = " | ".join(
            f"{lbl}: {sc:.1%}" for lbl, sc in r["scores"].items()
        )
        print(f"   All scores: {scores_str}")
        print()


# ─── CLI entry-point ─────────────────────────────────────────────────────────
def main():
    parser = argparse.ArgumentParser(
        description=(
            "Material Surface Classifier — classify images into: "
            "asphalt, concrete, metal, wood, other"
        ),
    )
    parser.add_argument(
        "inputs", nargs="+",
        help="Image file(s) or director(y/ies) to classify",
    )
    parser.add_argument(
        "--model", default=DEFAULT_MODEL,
        help=f"Model path or Hub ID (default: {DEFAULT_MODEL})",
    )
    parser.add_argument(
        "--device", default=None,
        help="Device: cuda / cpu / mps (default: auto-detect)",
    )
    parser.add_argument(
        "--threshold", type=float, default=0.0,
        help="Flag predictions below this confidence (default: 0.0)",
    )
    parser.add_argument(
        "--top-k", type=int, default=5,
        help="Number of top predictions to return (default: 5)",
    )
    parser.add_argument(
        "--json", action="store_true",
        help="Output results as JSON",
    )

    args = parser.parse_args()

    # Discover images
    image_paths = collect_images(args.inputs)
    if not image_paths:
        print("❌ No valid image files found.", file=sys.stderr)
        sys.exit(1)

    print(f"🔍 Found {len(image_paths)} image(s) to classify")
    print(f"📦 Loading model: {args.model}\n")

    # Load & run
    clf = MaterialSurfaceClassifier(args.model, device=args.device)
    results = clf.predict_batch(image_paths, top_k=args.top_k, threshold=args.threshold)

    # Output
    print_results(results, as_json=args.json)

    # Summary
    if not args.json:
        from collections import Counter
        preds = [r["label"] for r in results if "error" not in r]
        if preds:
            counts = Counter(preds)
            print("─" * 40)
            print("📊 Summary:")
            for label, count in counts.most_common():
                print(f"   {label}: {count} image(s)")


if __name__ == "__main__":
    main()